Dictionary files format

All announcements, questions and issues related to the TsiLang Components Suite.
Post Reply
CPS
Posts: 19
Joined: Fri Sep 17, 2021 1:48 pm

Dictionary files format

Post by CPS »

I like the SiLang system and extensively use that, but the existing Dictionary Manager lacks many functions and is inconvenient.
In particular, it does not support CTL-F combination for the Find function and the latter does not have an exact search option,
so I must 8-10 times click Next to find an item.

I'd like to create my own Dictionary Manager but couldn't find a *.dic files format description.
It seems the format is pretty straightforward. Where I can find that?
Could you publish the format?
isiticov
Site Admin
Posts: 2385
Joined: Thu Nov 21, 2002 3:17 pm

Re: Dictionary files format

Post by isiticov »

Hello,

To use Find/Replace feature in the Dictionary Manager there is CTRL+H shortcut used.
.PAS file with DIC format object is below. Hope this helps.

Code: Select all

//## Module: SIDICT
//## Source file: sidict.pas
//## Version: 1.4
//## Modification date: 27/04/00 7:39:15 AM
//## Created by Igor Siticov
//## Copyright © 2000, Igor Siticov, SiComponents
//## All rights reserved.
//##
//##
//## ------End of header
unit sidict;

interface
uses SysUtils, Classes, Windows, Graphics;

type

  PLangInfo = ^TLangInfo;
  TLangInfo = packed record
    Length: LongWord;
    Name: array[0..0] of WideChar;
  end;

  PDictionaryHeader = ^TDictionaryHeader;
  TDictionaryHeader = packed record
    HeaderSize: LongWord;
    NumOfLangs: LongWord;
    //    LangsInfo: array[0..0] of TLangInfo;
  end;

  PStrEntry = ^TStrEntry;
  TStrEntry = packed record
    Length: LongWord;
    Content: array[0..0] of WideChar;
  end;

  TLangArray = array[0..0] of TStringList;
  PLangArray = ^TLangArray;

  TDictionary = class(TObject)
  private
    FContentRead: Boolean;
    FCount: Integer;
    FLanguages: TStrings;
    FIsOld: Boolean;
    function GetItemCount: Integer;
    function GetLanguage(Index: Integer): TStringList;
    function GetLanguageCount: Integer;
  protected
    FHeader: TDictionaryHeader;
    FDataStream: TMemoryStream;
    PLangArr: PLangArray;
    procedure ReadContent;
    procedure WriteContent(SaveOld: Boolean);
    function FindStrInList(List: TStringList; AStr: String): Integer;
  public
    FileName: TFileName;
    constructor Create(AFilename: TFileName); overload;
    constructor Create(const AFilename: string; Dummy: Byte); overload;
    destructor Destroy; override;
    procedure Save(SaveOld: Boolean); virtual;
    property Languages: TStrings read FLanguages;
    property LanguageCount: Integer read GetLanguageCount;
    property ItemCount: Integer read GetItemCount;
    property IsOld: Boolean read FIsOld;
    function Item(Language, Index: Integer): String; virtual;
    property Language[Index: Integer]: TStringList read GetLanguage;
    procedure AddLanguage(Lang: String); virtual;
    procedure AddItem(Content: array of String); virtual;
    procedure Clear;
    function IsStrIncluded(AStr: String): Boolean; virtual;
    function GetTranslation(Lang1, Lang2: String; AStr: String):
      String; virtual;
    procedure ReadHeader(const AFileName: string);
    class function GetLanguages(const FileName: string): TStrings;
  end;

function CheckDicFile(const FileName: string): Boolean;

function StripFixedPrefix(const AName: string): string;
function IsFixedName(const AName: string): Boolean;

const
  SFixedLanguagePrefix = '!!!Fixed!!!';

implementation

uses
  _deffonts;

function TranslateCharsetInfoEx(lpSrc: PDWORD; var lpCs: TCharsetInfo; dwFlags:
  DWORD): Bool; stdcall;
  external 'gdi32.dll' Name 'TranslateCharsetInfo';

type
  EWrongLangName = class(Exception)
  end;
  ENotTranslated = class(Exception)
  end;
  ENewerVersion = class(Exception)
  end;
resourcestring
  SWrongLangName = 'Wrong language name!';
  SNotTranslated = 'There is no matching string in repository!';
  SNewerVersion = 'File has a newer format, cannot open';

const
  VerNo: LongWord = 1;

var
  DefFontsList: TDefFontsList = nil;

function StripFixedPrefix(const AName: string): string;
begin
  if Pos(SFixedLanguagePrefix, AName) = 1 then
    Result := Copy(AName, Length(SFixedLanguagePrefix) + 1, MaxInt)
  else
    Result := AName;
end;

function IsFixedName(const AName: string): Boolean;
begin
  Result := Pos(SFixedLanguagePrefix, AName) = 1;
end;

function StringToWideStringEx(const S: AnsiString; CodePage: Word): String;
var
  InputLength, OutputLength: Integer;
begin
  InputLength := Length(S);
  OutputLength := MultiByteToWideChar(CodePage, MB_PRECOMPOSED, PAnsiChar(S), InputLength, nil, 0);
  SetLength(Result, OutputLength);
  MultiByteToWideChar(CodePage, MB_PRECOMPOSED, PAnsiChar(S), InputLength, PWideChar(Result), OutputLength);
end;

function StringToWideStringEx2(const S: AnsiString; CharSet: TFontCharset): String;
var
  CSI: TCharsetInfo;
begin
  if TranslateCharsetInfoEx(Pointer(CharSet), CSI, TCI_SRCCHARSET) then
    Result := StringToWideStringEx(S, CSI.ciACP)
  else
    Result := String(S);
end;

function WideStringToStringEx(const WS: String; CodePage: Word): AnsiString;
var
  InputLength, OutputLength: Integer;
begin
  InputLength := Length(WS);
  OutputLength := WideCharToMultiByte(CodePage, 0, PWideChar(WS), InputLength, nil, 0, nil, nil);
  SetLength(Result, OutputLength);
  WideCharToMultiByte(CodePage, 0, PWideChar(WS), InputLength, PAnsiChar(Result), OutputLength, nil, nil);
end;

function WideStringToStringEx2(const WS: String; Charset: TFontCharset): AnsiString;
var
  CSI: TCharsetInfo;
begin
  if TranslateCharsetInfoEx(Pointer(Charset), CSI, TCI_SRCCHARSET) then
    Result := WideStringToStringEx(WS, CSI.ciACP)
  else
    Result := AnsiString(WS);
end;

function GetFontCharset(Language: string): TFontCharset;
begin
  if DefFontsList = nil then
    DefFontsList := TDefFontsList.Create;
  with DefFontsList do
    Result := GetLangCharset(Language);
end;

{ TDictionary }

procedure TDictionary.AddItem(Content: array of String);
var
  I: Integer;
begin
  for I := 0 to LanguageCount - 1 do
    if I <= High(Content) then
      PLangArr^[I].Add(Content[I]);
end;

procedure TDictionary.AddLanguage(Lang: String);
var
  I: Integer;
begin
  if FindStrInList(TStringList(FLanguages), Lang) >= 0 then
    Exit;
  FLanguages.Add(Lang);
  ReallocMem(PLangArr, LanguageCount * SizeOf(TStringList));
  PLangArr^[LanguageCount - 1] := TStringList.Create;
  if LanguageCount > 1 then
    for I := 0 to PLangArr^[LanguageCount - 2].Count - 1 do
      PLangArr^[LanguageCount - 1].Add('');
end;

procedure TDictionary.Clear;
var
  I: Integer;
begin
  for I := 0 to LanguageCount - 1 do
  begin
    PLangArr^[I].Clear;
    PLangArr^[I].Free;
  end;
  FLanguages.Clear;
end;

constructor TDictionary.Create(AFilename: TFileName);
var
  FStr: TFileStream;
begin
  FIsOld := False;
  FLanguages := TStringList.Create;
  FDataStream := TMemoryStream.Create;
  FileName := AFilename;
  if FileExists(AFilename) then
  begin
    FStr := TFileStream.Create(AFilename, fmOpenRead);
    try
      FDataStream.LoadFromStream(FStr);
      FDataStream.Position := 0;
    finally
      FStr.Free;
    end;
    ReadContent;
    FContentRead := True;
  end;
end;

constructor TDictionary.Create(const AFilename: string; Dummy: Byte);
begin
  inherited Create;
  FIsOld := False;
  FLanguages := TStringList.Create;
  FDataStream := TMemoryStream.Create;
  FileName := AFilename;
  FContentRead := False;
  ReadHeader(Filename);
end;

destructor TDictionary.Destroy;
var
  I: Integer;
begin
  if FContentRead then
    if PLangArr <> nil then
    begin
      for I := 0 to LanguageCount - 1 do
        PLangArr^[I].Free;
      FreeMem(PLangArr);
    end;
  FLanguages.Free;
  FDataStream.Free;
  inherited;
end;

function TDictionary.FindStrInList(List: TStringList; AStr: String):
  Integer;
var
  I: Integer;
begin
  Result := -1;
  for I := 0 to List.Count - 1 do
    if WideUpperCase(List[I]) = WideUpperCase(AStr) then
    begin
      Result := I;
      Break;
    end;
end;

function TDictionary.GetItemCount: Integer;
begin
  Result := 0;
  if Assigned(PLangArr) and Assigned(PLangArr^[0]) then
    Result := PLangArr^[0].Count
  else if FCount > 0 then
    Result := FCount;
end;

function TDictionary.GetLanguage(Index: Integer): TStringList;
begin
  Result := PLangArr^[Index];
end;

function TDictionary.GetLanguageCount: Integer;
begin
  Result := FLanguages.Count;
end;

class function TDictionary.GetLanguages(const FileName: string): TStrings;
var
  I: Integer;
  Len: LongWord;
  LangName: String;
  FStr: TFileStream;
  DICHeader: TDictionaryHeader;
begin
  Result := TStringList.Create;
  if not FileExists(FileName) then
    Exit;
  FStr := TFileStream.Create(FileName, fmOpenRead);
  try
    with FStr do
    begin
      if Size = 0 then
        Exit;
      // see if it's the new format
      ReadBuffer(Len, SizeOf(Len));
      if Len = 0 then // yes, it is
      begin
        ReadBuffer(Len, SizeOf(Len));
        if Len > VerNo then
          raise ENewerVersion.Create(SNewerVersion);
        Seek(2 * SizeOf(LongWord), soFromCurrent);
      end
      else // no, then rewind
      begin
        Seek(0, soFromBeginning);
      end;
      // reading the dictionary header...
      ReadBuffer(DICHeader, SizeOf(DICHeader));
      // Reading header fields...
      for I := 1 to DICHeader.NumOfLangs do
      begin
        ReadBuffer(Len, SizeOf(LongWord));
        SetLength(LangName, Len div 2);
        ReadBuffer(LangName[1], Len);
        Result.Add(LangName);
      end;
    end;
  finally
    FStr.Free;
  end;
end;

function TDictionary.GetTranslation(Lang1, Lang2,
  AStr: String): String;
var
  I, J, k: Integer;
begin
  Result := '';
  I := FindStrInList(TStringList(FLanguages), Lang1);
  J := FindStrInList(TStringList(FLanguages), Lang2);
  if (I < 0) or (J < 0) then
    raise EWrongLangName.Create(SWrongLangName);
  k := FindStrInList(PLangArr^[I], AStr);
  if (k < 0) then
    raise ENotTranslated.Create(SNotTranslated);
  Result := PLangArr^[J].Strings[k];
end;

function TDictionary.IsStrIncluded(AStr: String): Boolean;
var
  I: Integer;
begin
  Result := False;
  for I := 0 to LanguageCount - 1 do
    if FindStrInList(PLangArr^[I], AStr) >= 0 then
    begin
      Result := True;
      Break;
    end;
end;

function TDictionary.Item(Language, Index: Integer): String;
begin
  Result := PLangArr^[Language].Strings[Index];
end;

procedure TDictionary.ReadContent;
var
  I, J, K: Integer;
  Len, Count: LongWord;
  LangName, S: String;
  so: AnsiString;
  CSI: TCharsetInfo;
begin
  if FDataStream.Size > 0 then
  begin
    with FDataStream do
    begin
      // see if it's the new format
      ReadBuffer(Len, SizeOf(Len));
      if Len = 0 then // yes, it is
      begin
        ReadBuffer(Len, SizeOf(Len));
        if Len > VerNo then
          raise ENewerVersion.Create(SNewerVersion);
        Seek(2 * SizeOf(LongWord), soFromCurrent);
      end
      else // no, then rewind
      begin
        FIsOld := True;
        Seek(0, soFromBeginning);
      end;

      // reading the dictionary header...
      ReadBuffer(FHeader, SizeOf(FHeader));
      // creating the array of language info...
      if FHeader.NumOfLangs > 0 then
      begin
        GetMem(PLangArr, SizeOf(TStringList) * FHeader.NumOfLangs);
        for I := 0 to FHeader.NumOfLangs - 1 do
          PLangArr^[I] := TStringList.Create;
      end;
      // Reading header fields...
      for I := 1 to FHeader.NumOfLangs do
      begin
        ReadBuffer(Len, SizeOf(LongWord));
        SetLength(LangName, Len div 2);
        ReadBuffer(LangName[1], Len);
        FLanguages.Add(LangName);
      end;
      ReadBuffer(Count, SizeOf(LongWord));
      // reading the content of languages...
      for I := 1 to Count do
      begin
        for J := 1 to FHeader.NumOfLangs do
        begin
          ReadBuffer(Len, SizeOf(LongWord));
          if FIsOld then
          begin
            SetLength(S, Len div 2);
            ReadBuffer(S[1], Len);
            SetLength(so, Len);
            Move(S[1], so[1], Len);
            K := 1;
            while K < Length(so) do
            begin
              if so[K] = #0 then
                Delete(so, K, 1)
              else
                Inc(K);
            end;
            //so := S;
            if Win32Platform = VER_PLATFORM_WIN32_NT then
              if TranslateCharsetInfoEx(Pointer(GetFontCharset(FLanguages[J - 1])), CSI, TCI_SRCCHARSET) then
              begin
                S := StringToWideStringEx(so, CSI.ciACP)
              end
              else
                S := String(so)
            else
              S := String(so);
          end
          else
          begin
            SetLength(S, Len div 2);
            ReadBuffer(S[1], Len);
          end;
          if Length(S) > 0 then
            while (Length(S) > 0) and (S[Length(S)] = #0) do
              // a bug in prev. vers. caused #0 to be appended
              SetLength(S, Length(S) - 1);
          PLangArr^[J - 1].Add(S);
        end;
      end;
    end;
  end;
end;

procedure TDictionary.ReadHeader(const AFileName: string);
var
  I: Integer;
  Len: LongWord;
  LangName: String;
  FStr: TFileStream;
begin
  FCount := 0;
  if not FileExists(AFileName) then
    Exit;
  FStr := TFileStream.Create(AFileName, fmOpenRead);
  try
    FDataStream.LoadFromStream(FStr);
    FDataStream.Position := 0;
  finally
    FStr.Free;
  end;
  if FDataStream.Size = 0 then
    Exit;
  with FDataStream do
  begin
    // see if it's the new format
    ReadBuffer(Len, SizeOf(Len));
    if Len = 0 then // yes, it is
    begin
      ReadBuffer(Len, SizeOf(Len));
      if Len > VerNo then
        raise ENewerVersion.Create(SNewerVersion);
      Seek(2 * SizeOf(LongWord), soFromCurrent);
    end
    else // no, then rewind
    begin
      FIsOld := True;
      Seek(0, soFromBeginning);
    end;
    // reading the dictionary header...
    ReadBuffer(FHeader, SizeOf(FHeader));
    // Reading header fields...
    for I := 1 to FHeader.NumOfLangs do
    begin
      ReadBuffer(Len, SizeOf(LongWord));
      SetLength(LangName, Len div 2);
      ReadBuffer(LangName[1], Len);
      FLanguages.Add(LangName);
    end;
    ReadBuffer(FCount, SizeOf(LongWord));
  end;
end;

procedure TDictionary.Save(SaveOld: Boolean);
begin
  WriteContent(SaveOld);
end;

procedure TDictionary.WriteContent(SaveOld: Boolean);
var
  I, J: Integer;
  dw: LongWord;
  wc: String;
begin
  if (Win32Platform = VER_PLATFORM_WIN32_NT) and not SaveOld then
    with TFileStream.Create(FileName, fmCreate) do
    try
      dw := 0; // new format marker
      WriteBuffer(dw, SizeOf(dw));
      WriteBuffer(VerNo, SizeOf(VerNo)); // format version

      dw := 0;
      WriteBuffer(dw, SizeOf(dw)); // reserved
      WriteBuffer(dw, SizeOf(dw)); // reserved

      dw := 0;
      WriteBuffer(dw, SizeOf(LongWord));
      dw := LanguageCount;
      WriteBuffer(dw, SizeOf(LongWord));
      for I := 1 to LanguageCount do
      begin
        wc := String(FLanguages[I - 1]);
        dw := Length(wc) * 2;
        WriteBuffer(dw, SizeOf(LongWord));
        WriteBuffer(wc[1], dw);
      end;
      I := Position;
      Position := 4 * SizeOf(dw);
      WriteBuffer(I, SizeOf(LongWord));
      Position := I;
      dw := ItemCount;
      WriteBuffer(dw, SizeOf(LongWord));
      for I := 0 to ItemCount - 1 do
      begin
        for J := 0 to LanguageCount - 1 do
        begin
          if PLangArr^[J].Count > I then
            wc := PLangArr^[J].Strings[I]
          else
            wc := '';
          dw := Length(wc) * 2;
          WriteBuffer(dw, SizeOf(LongWord));
          WriteBuffer(wc[1], dw);
        end;
      end;
    finally
      Free;
    end
  else
    with TFileStream.Create(FileName, fmCreate) do
    try
      dw := 0;
      WriteBuffer(dw, SizeOf(LongWord));
      dw := LanguageCount;
      WriteBuffer(dw, SizeOf(LongWord));
      for I := 1 to LanguageCount do
      begin
        wc := String(FLanguages[I - 1]);
        dw := Length(wc) * 2;
        WriteBuffer(dw, SizeOf(LongWord));
        WriteBuffer(wc[1], dw);
      end;
      I := Position;
      Position := 0;
      WriteBuffer(I, SizeOf(LongWord));
      Position := I;
      dw := ItemCount;
      WriteBuffer(dw, SizeOf(LongWord));
      for I := 0 to ItemCount - 1 do
      begin
        for J := 0 to LanguageCount - 1 do
        begin
          if PLangArr^[J].Count > I then
            if Win32Platform = VER_PLATFORM_WIN32_NT then
              wc := String(WideStringToStringEx2(PLangArr^[J].Strings[I], GetFontCharset(FLanguages[J])))
            else
              wc := String(PLangArr^[J].Strings[I])
          else
            wc := '';
          dw := Length(wc) * 2;
          WriteBuffer(dw, SizeOf(LongWord));
          WriteBuffer(wc[1], dw);
        end;
      end;
    finally
      Free;
    end;
end;

function CheckDicFile(const FileName: string): Boolean;
  function SiTrim(S: String): String;
  var
    I: Integer;
  begin
    Result := S;
    for I := Length(Result) downto 1 do
      if Result[I] < #32 then
        Delete(Result, 1, 1);
  end;
var
  dw: LongWord;
  WS: String;
  FHeader: TDictionaryHeader;
  I: Integer;
begin
  if SameText(ExtractFileExt(FileName), '.dix') then
  begin
    Result := True;
    Exit;
  end
  else
    Result := False;
  with TFileStream.Create(FileName, fmOpenRead) do
  try
    // see if it's the new format
    ReadBuffer(dw, SizeOf(dw));
    if dw = 0 then // yes, it is
    begin
      ReadBuffer(dw, SizeOf(dw));
      if dw > VerNo then
        Exit;
      Seek(2 * SizeOf(LongWord), soFromCurrent);
    end
    else // no, then rewind
    begin
      Seek(0, soFromBeginning);
    end;

    // reading the dictionary header...
    ReadBuffer(FHeader, SizeOf(FHeader));
    if FHeader.NumOfLangs = 0 then
      Exit;
    if FHeader.NumOfLangs > $FFFF then
      Exit;
    // Reading header fields...
    for I := 1 to FHeader.NumOfLangs do
    begin
      // length of language name
      ReadBuffer(DW, SizeOf(DWORD));
      if DW = 0 then
        Exit;
      if DW > $FFFF then
        Exit;
      // language name
      SetLength(WS, DW div 2);
      ReadBuffer(WS[1], DW);
      // delete non-printable chars
      WS := SiTrim(WS);
      if DW div 2 <> DWORD(Length(WS)) then
        Exit
    end;
    //      ReadBuffer(DW, SizeOf(DWORD));
    //      if DW = 0 then Exit;
    Result := True;
  finally
    Free;
  end;
end;

initialization

finalization
  if DefFontsList <> nil then
    DefFontsList.Free;

end.
Best regards,
Igor Siticov.
CPS
Posts: 19
Joined: Fri Sep 17, 2021 1:48 pm

Re: Dictionary files format

Post by CPS »

Excellent.
Your code is very clean and professionally made.
By using that I was able to create a working dictionary editor for one day.
Thank you much.
isiticov
Site Admin
Posts: 2385
Joined: Thu Nov 21, 2002 3:17 pm

Re: Dictionary files format

Post by isiticov »

CPS wrote: Fri Oct 06, 2023 6:55 pm does not have an exact search option,
so I must 8-10 times click Next to find an item.
Thank you for reporting this. We will add this in the next update.
Best regards,
Igor Siticov.
Post Reply