Hallvard's Blog: Win32

Showing posts with label Win32. Show all posts

Sunday, March 30, 2008

TDM#10: BorDebug – Return of the Giant

"The Delphi linker has always had the option of including so-called Turbo Debugger (TD32) debug information (on the Linker page of the Project Options dialog). The internal IDE Debugger does not normally use this information (Delphi 4 and 5 uses it when debugging external DLLs and EXE files), but instead relies on internal compiler structures build during an interactive compile.
External tools such as Borland’s Turbo Debugger[i] does rely on the TD32 information tacked at the end of the EXE or DLL file to enable symbolic debugging. This information is also used by a number of third party tools such as Numega’s BoundsChecker[ii], Turbo Power’s suite of Sleuth QA[iii] tools, Atanas Stoyanov’s freeware MemProof memory checking tool[iv], AutomatedQA’s QTime profiler[v], and Intel’s VTune sampling profiler [vi]
In this article we will see how we can utilise a relatively new DLL from Borland, BorDebug.DLL, to read and interpret the TD32 debug information in our own applications. [...]
We will discuss the functionality provided by the BorDebug DLL, present an import unit that gives us access to it from our Delphi applications, look at a set of wrapper classes to simplify the usage and show some simple demonstration programs."
H. Vassbotn, TDM, November 2000

unit HVBorDebug;
{
    Simplified class interface for the BorDebug API
    Written by Hallvard Vassbotn (hallvard.vassbotn@c2i.net),
    April 1999 - September 2000

    History:
    15.04.99 HV Created
    30.09.00 HV Updated and revised
}
interface

uses
 Windows, Classes, TypInfo, BorDebug, SysUtils;

type
 // ... removed a lot of stuff - see the code on disk
 TBorDebug = class(TObject)
 public
   constructor Create(const aFilename: string = '');
   destructor Destroy; override;
   procedure Open;
   procedure Close;
   property Handle: TBorDebHandle read GetHandle;
   property FileName: string;
   property SkipNames: boolean;
   property CacheNames: boolean;
   property Active: boolean;
   property NameCount: TItemCount;
   property Names[Index: TNameIndex]: string;
   property UnmangledNames[Index: TNameIndex]: string;
   property RegisterName[RegIndex: TRegNameIndex]: string;
   property SubSectionCount: TItemCount;
   property SubSections[Index: TSubSectionIndex]: TBorDebugSubSection;
   function CreateModule(const SubSection: TBorDebugSubSection): TBorDebugModule;
   function CreateSrcModule(const SubSection: TBorDebugSubSection ): TBorDebugSrcModule;
   procedure StartSymbols(const SubSection: TBorDebugSubSection);
   function GetNextSymbol(var Symbol: TBorDebugSymbol): boolean;
   function CreateSymbolInfo(const Symbol: TBorDebugSymbol): TSymbolInfo;
   function CreateTypeInfo(const aType: TBorDebugType): TTypeInfo;
   property TypeFromIndex[TypeIndex: TTypeIndex]: TBorDebugType;
   property TypeFromOffset[Offset: TFileOffset]: TBorDebugType;
   property TypeName[TypeIndex: TTypeIndex]: string;
   property GlobalSymbols[const SubSection: TBorDebugSubSection]: TBorDebugGlobalSymbol;
   property TypeCount: TItemCount;
   property TypesSignature: TSignature;
   property SubSectionDirectoryOffset: TFileOffset;
 end;

 TBorDebugModule = class(TBorDebugObject)
 public
   constructor Create(BorDebug: TBorDebug; Offset: TFileOffset);
   destructor Destroy; override;
   property Overlay            : TOverlayIndex  ;
   property LibIndex           : TLibraryIndex  ;
   property Style              : TDebuggingStyle;
   property TimeStamp          : TBDTimeStamp   ;
   property SegmentCount       : TItemCount     ;
   property NameIndex          : TNameIndex     ;
   property Name               : string         ;
   property ModuleSegmentList  : TList          ;
   property Segments[Index: integer]: TModuleSegment;
 end;

 TBorDebugSrcModule = class(TBorDebugObject)
 public
   constructor Create(BorDebug: TBorDebug; Offset: TFileOffset);
   destructor Destroy; override;
   property RangeCount         : TItemCount     ;
   property RangeSegments      : PSegmentIndices;
   property RangeSegmentStarts : PSegmentOffsets;
   property RangeSegmentEnds   : PSegmentOffsets;
   property SourceCount        : TItemCount     ;
   property SourceOffsets      : PFileOffsets   ;
   property NameIndices        : PNameIndices   ;
   property RangeCounts        : PItemCounts    ;
   property SourceFileList     : TList          ;
   property SourceFiles[Index: integer]: TSourceFileEntry;
   property SourceNames[Index: integer]: string;
 end;

 TModuleSegment = class(TObject)
 public
   constructor Create(Module: TBorDebugModule; SegmentIndex: TSegmentIndex);
   property LinkerSegment     : TLinkerSegmentIndex;
   property Offset            : TFileOffset        ;
   property Size              : TByteCount         ;
   property Flags             : TSegmentFlags      ;
 end;

 TSourceFileEntry = class(TObject)
 public
   constructor Create(SrcModule: TBorDebugSrcModule; SourceFileIndex: TSourceFileIndex);
   destructor Destroy; override;
   property BorDebug           : TBorDebug       ;
   property Handle             : TBorDebHandle   ;
   property Offset             : TFileOffset     ;
   property SrcModule          : TBorDebugSrcModule;
   property Name               : string          ;
   property NameIndex          : TNameIndex      ;
   property SourceFileIndex    : TSourceFileIndex;
   property RangeSegments      : PSegmentIndices ;
   property RangeSegmentStarts : PSegmentOffsets ;
   property RangeSegmentEnds   : PSegmentOffsets ;
   property LineNumberCounts   : PItemCounts     ;
   property LineNumerOffsetList: TList           ;
   property RangeCount         : TItemCount      ;
   property RangeLineNumbers[Index: integer]: TLineNumberOffsets;
 end;

 TLineNumberOffsets = class(TObject)
 public
   constructor Create(SourceFile: TSourceFileEntry; RangeIndex: TRangeIndex);
   destructor Destroy; override;
   property SourceFile  : TSourceFileEntry;
   property LineNumbers : PLineNumbers    ;
   property LineOffsets : PSegmentOffsets ;
   property LineCount   : TItemCount      ;
   property RangeIndex  : TRangeIndex     ;
 end;

 TSymbolInfo = class(TObject)
 public
   constructor Create(BorDebug: TBorDebug; Symbol: TBorDebugSymbol);
   destructor Destroy; override;
   function GetTypeIndex(var TypeIndex: TTypeIndex): boolean;
   function GetNameIndex(var NameIndex: TNameIndex): boolean;
   property Symbol        : TBorDebugSymbol;
   property SymbolOffset  : TFileOffset    ;
   property Len           : TByteCount     ;
   property Kind          : TSymbolKind    ;
   property Info          : TSymbolInfoRec ;
   property KindAsString  : string         ;
 end;

 TTypeInfo = class(TObject)
 public
   constructor Create(BorDebug: TBorDebug; aType: TBorDebugType);
   destructor Destroy; override;
   property BDType        : TBorDebugType  ;
   property TypeIndex     : TTypeIndex     ;
   property TypeOffset    : TFileOffset    ;
   property Length        : TByteCount     ;
   property TypeKind      : TTypeKind      ;
   property Info          : TTypeInfoRec   ;
   property NameIndex     : TNameIndex     ;
   property KindAsString  : string         ;
 end;

As usual you can read the full article (PDF) and download the full, original code (zip). Enjoy! ;)

[i] Turbo Debugger, a free download: http://www.borland.com/bcppbuilder/turbodebugger/

[ii] Numega BoundsChecker for Delphi: http://www.numega.com/products/aed/del.shtml

[iii] Turbo Power Sleuth QA Suite: http://www.turbopower.com/products/sleuthqa/

[iv] Atanas Stoyanov’s MemProof home page: http://www.totalqa.com/downloads/memproof.asp

[v] QTime: http://www.totalqa.com/index.asp

[vi] VTune: http://developer.intel.com/vtune/

Saturday, March 15, 2008

TDM#9: Exceptional Stack Tracing (HVEST)

One of the key questions you should ask yourself as a serious Delphi developer is; what kind of exception handling and logging am I using. If you're not using any custom or third party solution for tracking down exceptional incidents that occur in your production systems or at your customer sites, you're missing out big time!

A proper exception handling and logging system should at least log the calling context (the calls that lead up to the exception) in the form of a stack trace. This makes it so much easier to track down, identify and fix the cause of the problem.

In 1999 I wrote such a tool and published it in The Delphi Magazine article Exceptional Stack Tracing in October 1999. As I have mentioned before, parts of this tool is based on my earlier work on 16-bit stack tracer, YAST. It also uses the excellent RTLI (run-time line information) tool by Vitaly Miryanov. At the time I was inspired by Per Larsen's ExHook32 and Stefan Hoffmeister's Debug Mapper. So I upgraded and improved the stack tracer for Win32, integrated the RTLI code and researched and developed a general implicit DLL import hooking system and a specific exception notification mechanism.

Putting all the pieces together we were able to get meaningful symbolic stack traces from any exceptional error incident - wether it happened during development, testing or at the customer's site. This made it an order of magnitude easier and faster to identify and fix bugs that caused the exception (or to handle it more gracefully).

I always spent a fair amount of time on my articles, but this one was by far the most time-consuming. Here are some key excerpts from the article.

"Often, during beta testing of an application (and, horrors, sometimes in a release version), users will encounter bugs in the form of exceptions (both logical such as EConvertError and hardware such as EAccessViolation). The tricky part is that only address of where the exception occurred is reported by the default Delphi exception handler. This is more often than not, less than helpful. Typically, that address will map to a line deep inside the VCL or RTL. What we’re really interested in is how we ended up there in the first place with invalid parameters (i.e. a blank string or a nil pointer). To get that we would need a complete stack trace of the calls that ended up in the exception being raised.

This article is about developing such an exception stack tracer. Not only will it show a complete stack trace leading up to an exception, but in the presence of so-called Run-Time Location Information (RTLI), it will also give a complete symbolic stack trace. "

"I remember reading an excellent article about PE files by Matt Pietrek^[i]. In it he describes how implicit linking to external DLLs work. About the import address table, he says:

"Since the import address table is in a writeable section, it's relatively easy to intercept calls that an EXE or DLL makes to another DLL.. Simply patch the appropriate import address table entry to point at the desired interception function. There's no need to modify any code in either the caller or callee images. What could be easier?"

Yeah, what could be easier <g>? Such a statement just screams: “Implement me!”. We will implement a completely general way of hooking any routine in any implicitly loaded DLL. We can then use this technique to hook the Kernel32.RaiseException routine that is called from System._RaiseExcept. "

The key routine for hooking DLL imports is listed below:

function IsWin95CallThunk(Thunk: PWin95CallThunk): boolean;
begin
  Result := (Thunk^.PUSH = $68) and (Thunk^.JMP = $E9);
end;
 
function ReplaceImport(Base: Pointer; ModuleName: PChar; FromProc, ToProc: pointer): boolean;
var
  NtHeader          : PImageNtHeaders;
  ImportDescriptor  : PImageImportDescriptor;
  ImportEntry       : PImageThunkData;
  CurrModuleName    : PChar;
  IsThunked         : Boolean;
  FromProcThunk     : PWin95CallThunk;
  ImportThunk       : PWin95CallThunk;
  FoundProc         : boolean;
begin
  Result := false;
  FromProcThunk := PWin95CallThunk(FromProc);
  IsThunked := (Win32Platform = VER_PLATFORM_WIN32_WINDOWS) and
               IsWin95CallThunk(FromProcThunk);
  NtHeader := GetImageNtHeader(Base);
  ImportDescriptor := PImageImportDescriptor(DWORD(Base)+
    NtHeader.OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress);
  while ImportDescriptor^.NameOffset <> 0 do
  begin
    CurrModuleName := PChar(Base) + ImportDescriptor^.NameOffset;
    if StrIComp(CurrModuleName, ModuleName) = 0 then
    begin
      ImportEntry := PImageThunkData(DWORD(Base) + ImportDescriptor^.IATOffset);
      while ImportEntry^.FunctionPtr <> nil do
      begin
        if IsThunked then
        begin
          ImportThunk := PWin95CallThunk(ImportEntry^.FunctionPtr);
          FoundProc := IsWin95CallThunk(ImportThunk) and
                       (ImportThunk^.Addr = FromProcThunk^.Addr);
        end
        else
          FoundProc := (ImportEntry^.FunctionPtr = FromProc);
        if FoundProc then
        begin
          ImportEntry^.FunctionPtr := ToProc;
          Result := true;
        end;
        Inc(ImportEntry);
      end;
    end;
    Inc(ImportDescriptor);
  end;
end;

"Hooking RaiseException

Now that we have the HVHookDLL, it is very easy to hook the RaiseException routine in Kernel32.DLL – take a look at [the code below]"

unit HVExceptNotify;
// Unit that provides a notification service when exceptions are being raised
//
// Written by Hallvard Vassbotn, hallvard@balder.no, September 1999
interface
 
type
  TExceptNotify = procedure (ExceptObj: TObject; ExceptAddr: pointer; OSException: boolean);
var
  ExceptNotify: TExceptNotify;
 
implementation
 
uses
  Windows,
  SysUtils,
  HVHookDLL;
 
var
  Kernel32_RaiseException : procedure (dwExceptionCode, dwExceptionFlags, nNumberOfArguments: DWORD;
  lpArguments: PDWORD); stdcall;
 
type
  PExceptionArguments = ^TExceptionArguments;
  TExceptionArguments = record
    ExceptAddr: pointer;
    ExceptObj : TObject;
  end;
 
procedure HookedRaiseException(ExceptionCode, ExceptionFlags, NumberOfArguments: DWORD;
  Arguments: PExceptionArguments); stdcall;
// All calls to Kernel32.RaiseException ends up here
const
  // D2 has a different signature for Delphi exceptions
  cDelphiException    = {$IFDEF VER90}$0EEDFACE{$ELSE}$0EEDFADE{$ENDIF};
  cNonContinuable     = 1;
begin
  // We're only interested in Delphi exceptions raised from System's
  // internal _RaiseExcept routine
  if (ExceptionFlags    = cNonContinuable)       and
     (ExceptionCode     = cDelphiException)      and
     (NumberOfArguments = 7)                     and
     (DWORD(Arguments)  = DWORD(@Arguments) + 4) then
  begin
    // Run the event if it has been assigned
    if Assigned(ExceptNotify) then
      ExceptNotify(Arguments.ExceptObj, Arguments.ExceptAddr, false);
  end;
  // Call the original routine in Kernel32.DLL
  Kernel32_RaiseException(ExceptionCode, ExceptionFlags, NumberOfArguments, PDWORD(Arguments));
end;
 
var
  SysUtils_ExceptObjProc: function (P: PExceptionRecord): Exception;
 
function HookedExceptObjProc(P: PExceptionRecord): Exception;
begin
  // Non-Delphi exceptions such as AVs, OS and hardware exceptions
  // end up here. This routine is normally responsible for creating
  // a Delphi Exception object corresponding to the OS-level exception
  // described in the TExceptionRecord structure.
  //
  // We leave the mapping to the standard SysUtils routine,
  // but hook this to know about the exception and call our
  // event.

  // First call the original mapping function in SysUtils
  Result := SysUtils_ExceptObjProc(P);

  // Run the event if it has been assigned
  if Assigned(ExceptNotify) then
    ExceptNotify(Result, P^.ExceptionAddress, true);
end;
 
function GetRaiseExceptAddr: pointer;
asm
  LEA EAX, System.@RaiseExcept;
end;
 
initialization
  SysUtils_ExceptObjProc := System.ExceptObjProc;
  System.ExceptObjProc := @HookedExceptObjProc;
  HookImport(Pointer(FindHInstance(GetRaiseExceptAddr)), 'Kernel32.dll', 'RaiseException', @HookedRaiseException, @Kernel32_RaiseException)
 
finalization
  UnHookImport(Pointer(FindHInstance(GetRaiseExceptAddr)), 'Kernel32.dll', 'RaiseException', @HookedRaiseException, @Kernel32_RaiseException);
  System.ExceptObjProc := @SysUtils_ExceptObjProc;
  SysUtils_ExceptObjProc := nil;
 
end.

"The definition of what might be useful context information can vary according to what kind of application you are developing. The name of the currently focused form, the name of active database tables, the name of the logged in user and other global information might be useful. You can easily add any such value-added information yourself.
However, in all cases, a complete overview of the function calls that preceded the raised exception will be most useful. To get that, we have to implement something called a stack tracer. A stack tracer will analyze the current contents of the stack and try to figure out the return addresses stored there by the CPU as part of the CALL instruction operation.
YAST Nostalgia

[...] I have now converted [the 16-bit YAST stack tracer] to a 32-bit version and added some bells and whistles along the way – see the [code below]"

unit HVYAST32;
// Yet-Another-Stack-Tracer, 32-bit version
//
// Loosely based on my 16-bit YAST code published in
// The Delphi Magazine, issue 7.
//
// Description: A general call-back based stack-trace utility.
// Both stack frames based and raw stack tracing is supported.
//
// Written by Hallvard Vassbotn, hallvard@balder.no, July 1999
//
interface
 
uses
  Windows,
  SysUtils;
 
// The generic stack tracing machinery
 
const
  MaxBlock = MaxInt-$f;
type
  PBytes  = ^TBytes;
  TBytes  = array[0..MaxBlock div SizeOf(byte)] of byte;
  PDWORDS = ^TDWORDS;
  TDWORDS = array[0..MaxBlock div SizeOf(DWORD)] of DWORD;
  PStackFrame = ^TStackFrame;
  TStackFrame = record
    CallersEBP : DWORD;
    CallerAdr  : DWORD;
  end;
  TStackInfo = record
    CallerAdr  : DWORD;
    Level      : DWORD;
    CallersEBP : DWORD;
    DumpSize   : DWORD;
    ParamSize  : DWORD;
    ParamPtr   : PDWORDS;
    case integer of
     0 : (StackFrame : PStackFrame);
     1 : (DumpPtr    : PBytes);
  end;
  TReportStackFrame = function(var StackInfo: TStackInfo; PrivateData: Pointer): boolean;
 
procedure TraceStackFrames(ReportStackFrame: TReportStackFrame; PrivateData: Pointer);
procedure TraceStackRaw(ReportStackFrame: TReportStackFrame; PrivateData: Pointer);
 
// Default stack tracer
 
const
  MaxStackLevels = 50;
type
  TStackInfoArray = array[0..MaxStackLevels-1] of TStackInfo;
var
  StackDump: TStackInfoArray;
  StackDumpCount: integer;
 
function PhysicalToLogical(Physical: DWORD): DWORD;
function DefaultReportStackFrame(var StackInfo: TStackInfo; PrivateData: Pointer): boolean;
procedure SaveStackTrace(Raw: boolean; IgnoreLevels: integer; FirstCaller: pointer);
 
implementation
 
uses
  HVPEUtils;
 
{$W-} // This routine should not have a EBP stack frame
function GetEBP: pointer;
// Return the current contents of the EBP register
asm
  MOV EAX, EBP
end;
 
function GetESP: pointer;
// Return the current contents of the ESP register
asm
  MOV EAX, ESP
end;
 
function GetStackTop: DWORD;
asm
  // Pick up the top of the stack from the Thread Information Block (TIB)
  // pointed to by the FS segment register.
  //
  // Reference: Matt Pietrek, MSJ, Under the hood, on TIBs:
  // PVOID pvStackUserTop  // 04h Top of user stack
  // http:{msdn.microsoft.com/library/periodic/period96/periodic/msj/F1/D6/S2CE.htm }
  //
  MOV EAX, FS:[4]
end;
 
var
  TopOfStack : DWORD;
  BaseOfStack: DWORD;
  BaseOfCode : DWORD;
  TopOfCode  : DWORD;
 
procedure InitGlobalVars;
var
  NTHeader: PImageNTHeaders;
begin
  { Get pointers into the EXE file image }
  if BaseOfCode = 0 then
  begin
    NTHeader := GetImageNtHeader(Pointer(hInstance));
    BaseOfCode := DWord(hInstance) + NTHeader.OptionalHeader.BaseOfCode;
    TopOfCode := BaseOfCode + NTHeader.OptionalHeader.SizeOfCode;
    TopOfStack := GetStackTop;
  end;
end;
 
function ValidStackAddr(StackAddr: DWORD): boolean;
begin
  Result := (BaseOfStack < StackAddr) and (StackAddr < TopOfStack);
end;
 
function ValidCodeAddr(CodeAddr: DWORD): boolean;
begin
  Result := (BaseOfCode < CodeAddr) and  (CodeAddr < TopOfCode);
end;
 
function ValidCallSite(CodeAddr: DWORD): boolean;
// Validate that the code address is a valid code site
//
// Information from Intel Manual 24319102(2).pdf, Download the 6.5 MBs from:
//  http://developer.intel.com/design/pentiumii/manuals/243191.htm
//  Instruction format, Chapter 2 and The CALL instruction: page 3-53, 3-54
var
  CodeDWORD4: DWORD;
  CodeDWORD8: DWORD;
begin
  // First check that the address is within range of our code segment! 
  Result := (BaseOfCode < CodeAddr) and  (CodeAddr < TopOfCode);

  // Now check to see if the instruction preceding the return address  
  // could be a valid CALL instruction 
  if Result then
  begin
    // Check the instruction prior to the potential call site. 
    // We consider it a valid call site if we find a CALL instruction there 
    // Check the most common CALL variants first 
    CodeDWORD8 := PDWORD(CodeAddr-8)^;
    CodeDWORD4 := PDWORD(CodeAddr-4)^;

    Result :=
          ((CodeDWORD8 and $FF000000) = $E8000000) // 5-byte, CALL [-$1234567] 
       or ((CodeDWORD4 and $38FF0000) = $10FF0000) // 2 byte, CALL EAX 
       or ((CodeDWORD4 and $0038FF00) = $0010FF00) // 3 byte, CALL [EBP+0x8] 
       or ((CodeDWORD4 and $000038FF) = $000010FF) // 4 byte, CALL ?? 
       or ((CodeDWORD8 and $38FF0000) = $10FF0000) // 6-byte, CALL ?? 
       or ((CodeDWORD8 and $0038FF00) = $0010FF00) // 7-byte, CALL [ESP-0x1234567] 
    // It is possible to simulate a CALL by doing a PUSH followed by RET, 
    // so we check for a RET just prior to the return address
       or ((CodeDWORD4 and $FF000000) = $C3000000);// PUSH XX, RET 

    // Because we're not doing a complete disassembly, we will potentially report
    // false positives. If there is odd code that uses the CALL 16:32 format, we 
    // can also get false negatives. 

  end;
end;
 
function NextStackFrame(var StackFrame: PStackFrame;
                        var StackInfo : TStackInfo): boolean;
begin
  // Only report this stack frame into the StackInfo structure 
  // if the StackFrame pointer, EBP on the stack and return 
  // address on the stack are valid addresses 
  while ValidStackAddr(DWORD(StackFrame)) do
  begin
    // CallerAdr within current process space, code segment etc. 
    if ValidCodeAddr(StackFrame^.CallerAdr) then
    begin
      Inc(StackInfo.Level);
      StackInfo.StackFrame := StackFrame;
      StackInfo.ParamPtr   := PDWORDS(DWORD(StackFrame) + SizeOf(TStackFrame));
      StackInfo.CallersEBP := StackFrame^.CallersEBP;
      StackInfo.CallerAdr  := StackFrame^.CallerAdr;
      StackInfo.DumpSize   := StackFrame^.CallersEBP - DWORD(StackFrame);
      StackInfo.ParamSize  := (StackInfo.DumpSize - SizeOf(TStackFrame)) div 4;
      // Step to the next stack frame by following the EBP pointer 
      StackFrame           := PStackFrame(StackFrame^.CallersEBP);
      Result := true;
      Exit;
    end;
    // Step to the next stack frame by following the EBP pointer
    StackFrame := PStackFrame(StackFrame^.CallersEBP);
  end;
  Result := false;
end;
 
{$W+} // We must have stack-frames on for this routine
 
procedure TraceStackFrames(ReportStackFrame: TReportStackFrame; PrivateData: Pointer);
var
  StackFrame : PStackFrame;
  StackInfo  : TStackInfo;
begin
  // Start at level 0 
  StackInfo.Level := 0;

  // Make sure the global variables are correctly set 
  InitGlobalVars;

  // Get the current stack from from the EBP register 
  StackFrame := GetEBP;

  // We define the bottom of the valid stack to be the current EBP Pointer 
  // There is a TIB field called pvStackUserBase, but this includes more of the 
  // stack than what would define valid stack frames. 
  BaseOfStack := DWORD(StackFrame) - 1;

  // Loop over and report all valid stackframes
  while NextStackFrame(StackFrame, StackInfo) and
        ReportStackFrame(StackInfo, PrivateData) do
    {Loop};
end;
 
procedure TraceStackRaw(ReportStackFrame: TReportStackFrame; PrivateData: Pointer);
var
  StackInfo : TStackInfo;
  StackPtr : PDWORD;
  PrevCaller: DWORD;
begin
  // We define the bottom of the valid stack to be the current ESP pointer
  BaseOfStack := DWORD(GetESP);

  // We will not be able to fill in all the fields in the StackInfo record,
  // so just blank it all out first
  FillChar(StackInfo, SizeOf(StackInfo), 0);

  // Make sure the global variables are correctly set
  InitGlobalVars;

  // Clear the previous call address
  PrevCaller := 0;

  // Get a pointer to the current bottom of the stack
  StackPtr := PDWORD(BaseOfStack);
 
  // Loop through all of the valid stack space
  while DWORD(StackPtr) < TopOfStack do
  begin

    // If the current DWORD on the stack,
    // refers to a valid call site...
    if ValidCallSite(StackPtr^) and (StackPtr^ <> PrevCaller) then
    begin
      // then pick up the callers address 
      StackInfo.CallerAdr := StackPtr^;

      // remeber to callers address so that we don't report it repeatedly 
      PrevCaller := StackPtr^;

      // increase the stack level 
      Inc(StackInfo.Level);

      // then report it back to our caller 
      if not ReportStackFrame(StackInfo, PrivateData) then
        Break;
    end;

    // Look at the next DWORD on the stack 
    Inc(StackPtr);
  end;
end;
 
function DefaultReportStackFrame(var StackInfo: TStackInfo; PrivateData: Pointer): boolean;
begin
  Result := (StackDumpCount < MaxStackLevels-1);
  if Result                                 and  // We have an available slot 
     (DWORD(PrivateData) < StackInfo.Level) then // We're not going to skip this level 
  begin
    // Save the contents of this stack frame
    StackDump[StackDumpCount] := StackInfo;
    Inc(StackDumpCount);
  end;
end;
 
procedure SaveStackTrace(Raw: boolean; IgnoreLevels: integer; FirstCaller: pointer);
begin
  FillChar(StackDump, SizeOf(StackDump), 0);
  StackDumpCount := 0;
  // Fill the first slot, if we are given an address directly
  if Assigned(FirstCaller) then
  begin
    StackDump[0].CallerAdr := DWORD(FirstCaller);
    StackDumpCount := 1;
  end;
  if Raw
  then TraceStackRaw   (DefaultReportStackFrame, Pointer(IgnoreLevels))
  else TraceStackFrames(DefaultReportStackFrame, Pointer(IgnoreLevels));
end;
 
const
  LinkerOffset = $1000;
 
function PhysicalToLogical(Physical: DWORD): DWORD;
begin
  Result :=   Physical
            - DWORD(HInstance)
            - LinkerOffset;
end;
 
end.

"To Stack Frame, or not to Stack Frame – that is the Question

There are generally two different types of algorithms to choose from when implementing a stack tracer: the more elegant stack frame based algorithm and the raw brute force algorithm.

[...]

The frame-based stack tracing is elegant and fairly fast, but it has one major weakness. It will not find callers that have no stack frames. With the current crop of optimising compilers, most smaller routines will not have stack frames and this reduces the usefulness of the stack tracer dramatically. There are two solutions to this. Either force stack frames for all your code – and preferably the VCL and RTL, too. Or use another algorithm.

[...]

[T]he brute-force method is much more primitive. The algorithm is very easy: just look at all the DWORDs stored on the stack. If a DWORD happens to be a value that falls within the valid code segment of this module, include it in the stack trace. To avoid getting too many false positives, we can add some more constraints."

"Dusting off the RTLI

While having the stack trace in hand is a great step in the right direction, it is still rather cumbersome having to locate the correct copy of the project’s MAP file (providing that we have it somewhere) and then start searching for each logical address from the stack trace.
Ideally, the stack trace itself should include symbolic information such as the unit name, filename, line number and routine name the logical address corresponds to. Thanks to Vitaly Miryanov and his RTLI^[ii], we get this wonderful capability almost for free. He has already developed the framework and set of routines to make this possible. We just have to tweak the code a little to make it work with the newer compiler versions."

HVEST was a step in the right direction and using it is certainly better than nothing. If you are already using it you your code today, by all means continue to do so. But time has moved on and there are now more mature solutions available - including the open source JclDebug (as part of the JCL library) and the commercial madExcept, Exceptional Magic and EurekaLog. JclDebug was in part based on my HVEST code and brought forward by Petr Vones and others. If you haven't already, you should seriously consider using one of these - you will not regret it, believe me! ;)

As usual you can read the full article (PDF) and download the full, original code (zip). Enjoy!

[i] Matt Pietrek, MSJ March 1994: Peering inside the PE: A Tour of the Win32 Portable Executable File Format: http://msdn.microsoft.com/library/techart/msdn_peeringpe.htm

[ii] Vitaly Miryanov, TDM Issue 22, June 1997, Run-Time Location Information In Delphi 2

Sunday, March 02, 2008

TDM#6: Knitting Your Own Threads

One of the key reasons that computers have conquered the world is that they have been following Moore's Law with faster, smaller and cheaper CPUs (and similar "laws" and improvements of memory, hard disks, graphics cards, etc) coming out every year.

Until recently, all programs have just become faster and faster due to improved hardware. This has been dubbed "the free lunch" and has given sloppy programmers the "hardware-will-catch-up" excuse to write slow and bloated software.

Well, no more (this DDJ piece by Herb Sutter is recommended reading). The speed of normal run-of-the-mill single-threaded code are not getting any speedups any more. The main reason for this is that CPU clock speeds have hit the ~3 Ghz speed limit. On the other hand, CPUs are getting better at doing things in parallel - by packing multiple cores in the same die, multiple threads of execution can run at the same time. To exploit this parallelism, programs have to somehow execute multiple threads - so called multithreaded programming.

Even in the classical case of a single CPU core, there can often be substantial benefits of dividing the work of a program into multiple threads. You can improve user interface responsiveness by off-loading some of the heavy lifting to a background thread, for instance, while the main thread is still happily updating the GUI.

Delphi has always (well, since Delphi 2) anyway had some level of support for writing multithreaded applications. There is the TThread class that encapsulates the concept of a separate thread of execution and there are wrappers for critical sections, mutexes, semaphores and events. Allen Bauer is doing an admirable job of thinking, designing, (re-)implementing and writing about an upcoming Delphi Parallel Library (DPL) in his blog.

Still, one of the major weaknesses of the current Delphi VCL thread support is that there is no (easy) way for the main thread to wait for a signal in a non-polling, non-blocking fashion. Another related issue is that there is no clean way for threads to communicate between themselves or the main thread. For performance and blocking reasons, the dreaded TThread.Synchronize method should be avoided.

These are the main points I raised in The Delphi Magazine article Knitting Your Own Threads, published in December 1998 (time sure is flying, that is almost 10 years ago now! :)). It also goes into some detail the various threading support in the compiler, RTL and VCL (we are still in the Delphi 4 era here). It talks about and digs into the implementation and shortcomings of:

threadvar
MainThreadID
IsMultiThread
BeginThread / EndThread
TThreadFunc
TThread (Execute, WaitFor, Synchronize)
TThreadList
TEvent
TCriticalSection

Then it goes on to try and fill in some of the shortcomings, giving implementations and wrappers for (some of these have been covered in later Delphi releases):

TSynchroObject
THandleObject
TNamedObject
TMutex
TSemaphore
TWaitableThreadList
TMultiThreadedMainLoop
MsgWaitForMultipleObjects (hooking Applicaton.OnIdle)
TSignalList

A few excerpts of the article and code:

"[The TWaitableThreadList] class can be used to administer a background working thread. The thread class will keep two TWaitableThreadLists, one InBox for work to be done and one OutBox for worked finished by the thread ready to be picked up by the main thread (or even another work thread for further processing),see example work flow in Figure 2. "

constructor TSignalList.Create;
begin
  inherited Create;
  FList := TList.Create;
  // See MsgWaitForMultipleObjects in help for list of possible values
  FMsgWakeupMask := QS_AllInput;
end;

destructor TSignalList.Destroy;
begin
  FreeOwningTList(FList);
  inherited Destroy;
end;

procedure TSignalList.AddSignal(aSignal: TCustomSignal);
begin
  // Check that we are not passing any limits (currently 64!)
  if FList.Count >= MAXIMUM_WAIT_OBJECTS then
    raise Exception.Create('Too many wait-objects!');

  // Update the low-level array with this new handle
  FObjs[FList.Count] := aSignal.Handle;

  // Add the thread event to the list
  FList.Add(aSignal);
end;

procedure TSignalList.TriggeredIndex(Index: integer);
begin
  // Use assertions to guarantee correct code while debugging and fast release code
  Assert((Index >= 0) and (Index < FList.Count));
  Assert(TObject(FList[Index]) is TCustomSignal);
  Assert(FObjs[Index] = TCustomSignal(FList[Index]).Handle);

  // Get the Signal associated with this index and trigger the event
  TCustomSignal(FList.List^[Index]).Trigger;
end;

function TSignalList.WaitOne(WaitTime: DWORD; var Index: integer): TWaitResult;
// We use the blocking function MsgWaitForMultipleObjects to wait for any
// message in the message queue or any signaled object from any of the
// other running threads in this process. See WINAPI32.HLP for details.
var
  WaitResult: DWORD;
begin
  // This call will block and use 0% CPU time until:
  // - A message arrives in the message queue, or
  // - Any of the object handles in the Objs array become signaled
  if IgnoreMessages
  then WaitResult :=    WaitForMultipleObjects(FList.Count, @FObjs, WaitForAll, WaitTime)
  else WaitResult := MsgWaitForMultipleObjects(FList.Count,  FObjs, WaitForAll, WaitTime, MsgWakeupMask);

  // Index is only valid when Result = wrSignaled
  Index := WaitResult - WAIT_OBJECT_0;

  // Convert from WAIT_ returncode to TWaitResult
  case WaitResult of
    WAIT_ABANDONED: Result := wrAbandoned;
    WAIT_TIMEOUT  : Result := wrTimeout;
    WAIT_FAILED   : Result := wrError;
    else
      if WaitResult = DWORD(WAIT_OBJECT_0 + FList.Count)
      then Result := wrMessage
      else Result := wrSignaled  // WAIT_OBJECT_0 .. WAIT_OBJECT_0+(FList.Count-1)
  end;
end;

function TSignalList.WaitOneAndTrigger(WaitTime: DWORD): TWaitResult;
var
  Index: integer;
begin
  Result := WaitOne(WaitTime, Index);
  if Result = wrSignaled then
    TriggeredIndex(Index);
end;

function TSignalList.WaitUntil(WaitTime: DWORD; WaitResultStop: TWaitResults): TWaitResult;
begin
  repeat
    Result := WaitOneAndTrigger(WaitTime);
  until (Result in WaitResultStop);
end;

"Now that we have a nice encapsulation of MsgWaitForMultipleObjects to work with, lets continue with adding the improved threading capability to the main thread of the application (and thus the VCL). As we discussed, this is accomplished by hooking the OnIdle event of TApplication. See the source code of the TMultiThreadedMainLoop class in the HVMultiThreadMain unit."

procedure TMultiThreadedMainLoop.AppIdle(Sender: TObject; var Done: boolean);
// Whenever the application becomes idle, i.e. there are no messages in the
// message queue, this procedure is entered.
begin
  // The default case for the old idle event
  // handler should be that it is done processing
  Done := true;

  // Call any old idle event handler
  // - this could be extended with an idle hook chain
  if Assigned(FOldAppIdle) then
    FOldAppIdle(Sender, Done);

  // WaitUntil handles all signaled objects for the main thread
  if Done then
    // If the old idle event handler is done,
    // wait until there is a message for us (blocking)
    FSignalList.WaitUntil(INFINITE, [wrMessage])
  else
    // If the old idle event handler is not done yet,
    // just check for signaled objects or messages (non-blocking)
    FSignalList.WaitUntil(0       , [wrMessage, wrTimeOut]);

  // Tell the timer-loop that we have actully been idle
  FHasBeenIdle := true;

  // Now return to the message loop in TApplication and
  // let it have a look at the message for us
  // Note that we will normally return with Done = true.
  // This will call WaitMessage in TApplication.Idle, but it will not
  // block because we already know there is a message in the message queue
end;

Continue to read the full article (PDF) and download the code.

Tuesday, February 12, 2008

TDM#5: Slimming the Fat off Your Apps

I think that the best TDM article title I've had is Slimming the Fat off Your Apps. It was published in November 1998 and we're still in the Delphi 4 era (but as always many people were still using the older versions, D2 and D3).

The article intro said:

"Hallvard Vassbotn wants to slim the fat off your software: tune in here if you want [your programs] to be leaner and fitter..."

The core points of the article are:

The Windows NT family of operating systems reduce the apparent memory footprint of an application when you minimize it
The code for compiler-generated initialization and finalization sections of units changed between D2 and D3 (to the better and worse)
The mechanism and code for invoking initialization and finalization sections changed markedly between D2 and D3
Initialization code is spread out through the EXE file - causing "heavy" memory usage after startup.
There is a trick to simulate the minimize-app memory "savings" in code (calling SetProcessWorkingSetSize)

In my opinion, the whole memory savings issue is a game of smoke and mirrors - no actual memory is saved, just the internal accounting and how pages are marked as belonging to an applications working set or not is affected. It may impress users looking at the mostly misleading memory usage in Task Manager, but it has no effect on the real memory pressure of the machine.

Here is the code (courtesy of Ray Nelson) for trimming the "fat":

unit TrimMem;
//
// Code courtesy of Roy Nelson (rnelson@inprise.com),
// Inprise European Professional Support
//
// Call TrimWorkingSet from you project file or from a package
// to reduce the memory overhead - note that this will only work
// on Windows NT.
//
// From Delphi Magazine article "Slimming the fat off your Apps"
// by Hallvard Vassbotn, hallvard@falcon.no
//
interface

procedure TrimWorkingSet;

implementation

uses
  Windows,
  SysUtils;

procedure TrimWorkingSet;
var
  MainHandle : THandle;
begin
 MainHandle := OpenProcess(PROCESS_ALL_ACCESS, false, GetCurrentProcessID);
 SetProcessWorkingSetSize(MainHandle,DWORD(-1),DWORD(-1));
 CloseHandle(MainHandle);
end;

end.

The full PDF article and code are available for download.

Wednesday, January 30, 2008

TDM#4: Delphi 4 Bugs and Fixes

Delphi 4, released in the summer of 1998, was one of the most notorious Delphi releases ever. The initial release contained a large number of serious bugs, and it later became clear that the release date had been pushed by management and/or marketing and not sanctioned by technical and R&D.

While all of this is water under the bridge, one very visible bug, the so-called TListBox ItemIndex bug, called for an interesting patching technique that has since been employed numerous times (by me and others) to fix RTL and VCL issues at runtime.

Dave Jewell and myself both contributed to the TDM article named Delphi 4 Bugs and Fixes, published in the September 1998 issue. I have Dave's permission to publish our joint effort here [Thanks, Dave!].

The gist of the problem was:

"Cause
With the release of Delphi4, Inprise changed the implementation of TCustomListBox’s ItemIndex property. The implementation of the access methods in Delphi 3 was [like this]:

function TCustomListBox.GetItemIndex: Integer;
begin
  Result := SendMessage(Handle, LB_GETCURSEL, 0, 0);
end;
 
function TCustomListBox.GetSelCount: Integer;
begin
  Result := SendMessage(Handle, LB_GETSELCOUNT, 0, 0);
end;

This was changed in Delphi 4 build 5.3 into [this]code:

function TCustomListBox.GetItemIndex: Integer;
begin
  if not MultiSelect then
    Result := SendMessage(Handle, LB_GETCARETINDEX, 0, 0) else
    Result := SendMessage(Handle, LB_GETCURSEL, 0, 0);
end;
 
procedure TCustomListBox.SetItemIndex(Value: Integer);
begin
  if GetItemIndex <> Value then
    if MultiSelect then
      SendMessage(Handle, LB_SETCARETINDEX, Value, 0) else
      SendMessage(Handle, LB_SETCURSEL, Value, 0);
end;

The change has an obvious bug, in TCustomListBox.GetItemIndex the if statement should have been:

  if MultiSelect then

instead of:

  if not MultiSelect

And indeed, that is how the method looks like in current Delphis. The solution to the problem was:

"a small unit that patches the code in TCustomListBox.GetItemIndex at runtime. It can handle both statically linked and packaged VCL.

unit FixupLB;
// Simple unit that patches the StdCtrls.TCustomListBox.GetItemIndex
// method in Delphi 4.0 (build 5.37)
//
// July 1998, by Hallvard Vassbotn (hallvard@falcon.no)
  
interface
  
implementation
  
uses
  Windows,
  TypInfo,
  StdCtrls;
  
type
  PPGetItemIndex = ^PGetItemIndex;
  PGetItemIndex = ^TGetItemIndex;
  TGetItemIndex = packed record
    PUSH_EBX      :  byte;
    MOV_EBX_EAX   : word;
    CMP_BYTE_PTR  : word;
    ADDR_Offset   : Cardinal;
    False_ZERO    : byte;
    JNZ           : byte;
    ELSE_REL_ADDR : byte;
    PUSH_LParam   : word;
    PUSH_WParam   : word;
    PUSH_Msg      : byte;
    Msg_Const     : Cardinal;
    MOV_EAX_EBX   : word;
    CALL          : byte;
  end;
  PGetItemIndexInPackage = ^TGetItemIndexInPackage;
  TGetItemIndexInPackage = packed record
    JMP_DWORD: word;
    ActualGetItemIndex : PPGetItemIndex;
  end;
  TBytes = array[0..3] of byte;
 
const
  OpCode_JNZ       = $75;
  OpCode_JZ        = $74;
  OpCode_JMP_DWORD = $25FF;
 
function IsBuggyCode(var GetItemIndex: PGetItemIndex): boolean;
var
  GetItemIndexInPackage: PGetItemIndexInPackage absolute GetItemIndex;
begin
  // Verify that this is a static method
  Result := (TBytes(GetItemIndex)[3] < $FE);
  if Result then
  begin
    // Handle the case when TCustomList.GetItemIndex is in a package
    if (GetItemIndexInPackage^.JMP_DWORD = OpCode_JMP_DWORD) then
      GetItemIndex := GetItemIndexInPackage^.ActualGetItemIndex^;
    with GetItemIndex^ do
    begin
      Result :=
        // The buggy instruction, should have been OpCode_JZ
        (JNZ = OpCode_JNZ)     and
        // Check the other instructions as well, just to be sure
        (PUSH_EBX     = $53)   and
        (MOV_EBX_EAX  = $D88B) and
        (CMP_BYTE_PTR = $BB80) and
        (False_ZERO   = $00)   and
        (ELSE_REL_ADDR= $18)   and
        (PUSH_LParam  = $006A) and
        (PUSH_WParam  = $006A) and
        (PUSH_Msg     = $68)   and
        (Msg_Const    = $19f)  and
        (MOV_EAX_EBX  = $C38B) and
        (CALL         = $E8)   ;
    end;
  end;
end;
 
procedure WriteCodeByte(CodeAddress: pointer; Value: byte);
var
  WrittenBytes: Cardinal;
begin
  // Must use WriteProcessMemory or VirtualProtect to write to code segment
  WriteProcessMemory(GetCurrentProcess, CodeAddress, @Value, SizeOf(Value), WrittenBytes);
end;
 
type
  // Publish ItemIndex to easily get address of GetItemIndex method
  TPublishedListBox = class(TCustomListBox)
  published
    property ItemIndex;
  end;
 
procedure Fix_TCustomListBox_GetItemIndex_Bug;
var
  PropInfo    : PPropInfo;
  GetItemIndex: PGetItemIndex;
begin
  // Get the property information for the newly published ItemIndex property
  PropInfo := TypInfo.GetPropInfo(TPublishedListBox.ClassInfo, 'ItemIndex');
  if Assigned(PropInfo) then
  begin
    // Get the get-property method address
    GetItemIndex := PropInfo^.GetProc;
    // Now check that the buggy code is there
    if IsBuggyCode(GetItemIndex) then
      // Patch in the correction, voilà!
      // Same as GetItemIndex^.JNZ := OpCode_JZ, without the AV...
      WriteCodeByte(@GetItemIndex^.JNZ, OpCode_JZ);
  end;
end;
 
initialization
  Fix_TCustomListBox_GetItemIndex_Bug;
end.

...

When you get hold of and install the Delphi 4 update, the fix unit will silently find out that the code in GetItemIndex has changed and cause no harm. However,at that time you should remove the fix unit from the project anyway: it is better not to have self-modifying code in your project if you can avoid it."

The full PDF article and code are available for download.

Tuesday, October 23, 2007

Sergey Antonov implements Yield for Delphi!

The Russian Delphi programmer Sergey Antonov (or Антонов Сергей - aka. 0xffff) is a real hacker in the positive sense. He approached me with some intriguing assembly code that implements the equivalent of the C# yield statement!

Yield makes it easier to implement enumerators (you know the simple classes or records with methods like GetCurrent and MoveNext that enables the for-in statement). Normally you have to implement a kind of state-machine to write an enumerator. With the yield statement this is turned around allowing you to express the iteration using easier to write loops (while, repeat-until or even a for-in loop).

Sergey has pulled the impressive feat of implementing a proof-of-concept version of a yield infrastructure and mechanics - without help from the compiler!! It may have some limitations, but it is most interesting anyway. Without further ado, here is Sergey's article and code. Make sure you also read the follow-up article on Sergey's blog.

Despite some minor language barriers ;), this will be a most interesting blog to follow!

Guest article, by Sergey AntonoV

"C# Yield implementation in Delphi.

The C# yield keyword is used to provide a value to the enumerator object or to signal the end of iteration. The main idea of yield construction is to generate a collection item on request and return it to the enumerator consumer immediately. You may find it useful in some cases.

As you know the Enumerator has two methods MoveNext and GetCurrent.

But how does yield works?

Technical details of the implementation

When I saw this construction I asked myself where is MoveNext and GetCurrent?

The GetEnumerator function returns the enumerator object or interface, but the enumerator is not explicitly constructed anywhere. So there must be some secret mechanism that makes it possible.

How does it really work? After spending some time in the debugger and the answer appeared.

In short the compiler generates a special type of object that of course

has some magic MoveNext and GetCurrent functions.

And because this construction may be useful to our Delphi community, I asked myself, what can I do to get yield support in Delphi with no special methods calling with saving the form of using like in С#.

I first wanted to retain the yield C# syntax, but later I changed the syntax a little and used a delegate implementation to an external procedure almost like in C# but with an additional parameter yield wrapper object. First time it was a virtual procedure.

But of course I have to generalize implementation for all types.

And of course I had an additional question to myself. Сould I improve on the С# yield implementation? Maybe.

I started from the programmer’s viewpoint. Something like this:

var
  number, exponent, counter, Res:integer;
begin
  // ...
  Res:=1;
  while counter<exponent do
  begin
    Res:=Res*number;
    Yield(Res);
    Inc(counter);
  end;
end;

I had to implement some class that implemented the magic MoveNext and GetCurrent functions.

And if you use local vars (that is placed on stack) I had to implement some mechanism that guarantees no memory leaks for finalized types and some mechanism that guarantees that I use

the valid local vars when the actual address of local vars has changed after last yield calling due to external reasons (e.g. enumerator passed as parameter to other procedure, so the location in stack becomes different).

So after each yield call I have to preserve the state of local vars and processor registers,

clean up the stack and return a value to the enumerator consumer.

And after next call to MoveNext I must allocate stack space, restore the state of local vars and processor registers, i.e. emulate that nothing has happened.

And of course I must provide a normal procedure for exiting at the end.

So let’s begin

First of all we declare some types:

type
  TYieldObject = class;
  TYieldProc = procedure (YieldObject: TYieldObject);

  TYieldObject = class
  protected
    IsYield:boolean;
    NextItemEntryPoint:pointer;
    BESP:pointer;
    REAX,REBX,RECX,REDX,RESI,REDI,REBP:pointer;
    StackFrameSize:DWORD;
    StackFrame: array[1..128] of DWORD;
    procedure SaveYieldedValue(const Value); virtual; abstract;
  public
    constructor Create(YieldProc: TYieldProc);
    function MoveNext:boolean;
    procedure Yield(const Value);
  end;

And the implementation

constructor TYieldObject.Create(YieldProc:TYieldProc);
asm
  mov eax.TYieldObject.NextItemEntryPoint,ecx;
  mov eax.TYieldObject.REAX,EAX;
end;

function TYieldObject.MoveNext: boolean;
asm
  { Save the value of following registers.
    We must preserve EBP, EBX, EDI, ESI, EAX for some circumstances.
    Because there is no guarantee that the state of registers will 
    be the same after an iteration }
  push ebp;
  push ebx;
  push edi;
  push esi;
  push eax;

  mov eax.TYieldObject.IsYield,0
  push offset @a1
  xor edx,edx;
  cmp eax.TYieldObject.BESP,edx;
  jz @AfterEBPAdjust;

  { Here is the correction of EBP. Some need of optimization still exists. }
  mov edx,esp;
  sub edx,eax.TYieldObject.BESP;
  add [eax.TYieldObject.REBP],edx
  @AfterEBPAdjust:
  mov eax.TYieldObject.BESP,esp;

  { Is there any local frame? }
  cmp eax.TYieldObject.StackFrameSize,0
  jz @JumpIn;

  { Restore the local stack frame }
  mov ecx,eax.TYieldObject.StackFrameSize;
  sub esp,ecx;
  mov edi,esp;
  lea esi,eax.TYieldObject.StackFrame;

  { Some need of optimization still exists. Like movsd}
  rep movsb;
  @JumpIn:

  { Restore the content of processor registers }
  mov ebx,eax.TYieldObject.REBX;
  mov ecx,eax.TYieldObject.RECX;
  mov edx,eax.TYieldObject.REDX;
  mov esi,eax.TYieldObject.RESI;
  mov edi,eax.TYieldObject.REDI;
  mov ebp,eax.TYieldObject.REBP;
  push [eax.TYieldObject.NextItemEntryPoint];
  mov eax,eax.TYieldObject.REAX;

  { Here is the jump to next iteration }
  ret;

  { And we return here after next iteration in all cases, except exception of course. }
  @a1:;

  { Restore the preserved EBP, EBX, EDI, ESI, EAX registers }
  pop eax;
  pop esi;
  pop edi;
  pop ebx;
  pop ebp;
  { This Flag indicates the occurrence or no occurrence of Yield  }
  mov al,eax.TYieldObject.IsYield;
end;

procedure TYieldObject.Yield(const Value);
asm
  { Preserve EBP, EAX,EBX,ECX,EDX,ESI,EDI }
  mov eax.TYieldObject.REBP,ebp;
  mov eax.TYieldObject.REAX,eax;
  mov eax.TYieldObject.REBX,ebx;
  mov eax.TYieldObject.RECX,ecx;
  mov eax.TYieldObject.REDX,edx;   // This is the Ref to const param
  mov eax.TYieldObject.RESI,ESI;
  mov eax.TYieldObject.REDI,EDI;
  pop ecx;
  mov eax.TYieldObject.NextItemEntryPoint,ecx;

  //We must do it first for valid const reference
  push eax;
  mov ecx,[eax];
  CALL  DWORD PTR [ecx+VMTOFFSET TYieldObject.SaveYieldedValue];
  pop eax;
  
  { Calculate the current local stack frame size }
  mov ecx,eax.TYieldObject.BESP;
  sub ecx,esp;
  mov eax.TYieldObject.StackFrameSize,ecx;
  jz @AfterSaveStack;

  { Preserve the local stack frame }
  lea esi,[esp];
  lea edi,[eax.TYieldObject.StackFrame];
  
  { Some need of optimization still exists. Like movsd }
  rep movsb;
  mov esp,eax.TYieldObject.BESP;
  @AfterSaveStack:

  {Set flag of Yield occurance }
  mov eax.TYieldObject.IsYield,1;
end;

And what about my improvements

As for improvements I am still thinking about unwinding the local SEH (Structured Exception Handling) frames on yielding and restore it with any needed correction after return.

And how do you use it?

type
  TYieldInteger = class(TYieldObject)
  protected
    Value:integer;
    function GetCurrent:integer;
    procedure SaveYieldedValue(const Value); override;
  public
    property Current:integer read GetCurrent;
  end;

{ TYieldInteger }

function TYieldInteger.GetCurrent: integer;
begin
  Result:=Value;
end;

procedure TYieldInteger.SaveYieldedValue(const Value);
begin
  Self.Value:=integer(Value);
end;

So now there is full support for integer.

type
  TYieldString = class(TYieldObject)
  protected
    Value:string;
    function GetCurrent:string;
    procedure SaveYieldedValue(const Value); override;
  public
    property Current:string read GetCurrent;
  end;

{ TYieldString }

function TYieldString.GetCurrent: string;
begin
  Result:=Value;
end;

procedure TYieldString.SaveYieldedValue(const Value);
begin
  Self.Value := string(Value);
end;

And now there is full support for string.

Sample of using a string Enumerator

procedure StringYieldProc(YieldObj: TYieldObject);
var  
  YieldValue: string;
  i: integer;
begin
  YieldValue:='None';
  YieldObj.Yield(YieldValue);
  for i := 1 to 10 do
  begin
    YieldValue := YieldValue + IntToStr(i);
    YieldObj.Yield(YieldValue);
  end;
end;

function TForm1.GetEnumerator: TYieldString;
begin
  Result:=TYieldString.Create(StringYieldProc);
end;

procedure TForm1.Button1Click(Sender: TObject);
var 
  a:string;
begin
  for a in self do 
    Memo1.Lines.Add(a);
end;

From Russia with love

Sergey Antonov aka oxffff (Russia, Ukhta)

References:

ECMA 334

ECMA 335

MSDN

Sergey's next article is here.

Hallvard's Blog

Sunday, March 30, 2008

TDM#10: BorDebug – Return of the Giant

Saturday, March 15, 2008

TDM#9: Exceptional Stack Tracing (HVEST)

"Hooking RaiseException

YAST Nostalgia

"To Stack Frame, or not to Stack Frame – that is the Question

"Dusting off the RTLI

Sunday, March 02, 2008

TDM#6: Knitting Your Own Threads

Tuesday, February 12, 2008

TDM#5: Slimming the Fat off Your Apps

Wednesday, January 30, 2008

TDM#4: Delphi 4 Bugs and Fixes

Tuesday, October 23, 2007

Sergey Antonov implements Yield for Delphi!

Guest article, by Sergey AntonoV

"C# Yield implementation in Delphi.

About Me

My Sites

Labels

Blog Archive

Blogs To Read

Syndication

Page Hits

What do you think of web polls?

DelphiFeeds.com