View Issue Details

IDProjectCategoryView StatusLast Update
0036087LazarusLazUtilspublic2019-10-03 20:26
ReporterAlexey Tor.Assigned ToJuha Manninen 
PrioritynormalSeverityminorReproducibilityN/A
Status resolvedResolutionfixed 
Product VersionProduct Build 
Target VersionFixed in Version 
Summary0036087: Small (incompatible) optimization of LConvEncoding
DescriptionThis replaces "case" with 4 choices (which is called often- inside the "for" loop)
to simple code with 2 array parameters (instead of old 1 int parameter).
the "interface" function was removed, sorry, I don't see use case for it.
TagsNo tags attached.
Fixed in Revisionr61963
LazTarget-
Widgetset
Attached Files
  • lc.diff (4,064 bytes)
    Index: components/lazutils/asiancodepagefunctions.inc
    ===================================================================
    --- components/lazutils/asiancodepagefunctions.inc	(revision 61901)
    +++ components/lazutils/asiancodepagefunctions.inc	(working copy)
    @@ -12,7 +12,7 @@
       The clipboard is able to work with the windows and gtk behaviour/features.
     }
     
    -function DBCSToUTF8(const s: string; CodeP: integer): string;
    +function DBCSToUTF8(const s: string; const ArrayUni, ArrayCP: array of word): string;
     var
       len, l: Integer;
       Src, Dest: PChar;
    @@ -19,11 +19,7 @@
       c: char;
       code: word;
     begin
    -  if s = '' then
    -  begin
    -    Result := s;
    -    exit;
    -  end;
    +  if s = '' then exit('');
       len := length(s);
       SetLength(Result, len * 4);// Asia UTF-8 is at most 4 bytes
       Src  := PChar(s);
    @@ -45,19 +41,7 @@
           code := code + Byte(c);
           Inc(Src);
     
    -      case CodeP of
    -        936:
    -          code := Uni936C[SearchTable(CP936CC, code)];
    -        950:
    -          code := Uni950C[SearchTable(CP950CC, code)];
    -        949:
    -          code := Uni949C[SearchTable(CP949CC, code)];
    -        932:
    -          code := Uni932C[SearchTable(CP932CC, code)];
    -        else
    -          code := 0;
    -      end;
    -
    +      code := ArrayUni[SearchTable(ArrayCP, code)];
           if code>0 then
           begin
             l:=UnicodeToUTF8Inline(code,Dest);
    @@ -70,22 +54,22 @@
     
     function CP936ToUTF8(const s: string): string;
     begin
    -  Result := DBCSToUTF8(s, 936);
    +  Result := DBCSToUTF8(s, Uni936C, CP936CC);
     end;
     
     function CP950ToUTF8(const s: string): string;
     begin
    -  Result := DBCSToUTF8(s, 950);
    +  Result := DBCSToUTF8(s, Uni950C, CP950CC);
     end;
     
     function CP949ToUTF8(const s: string): string;
     begin
    -  Result := DBCSToUTF8(s, 949);
    +  Result := DBCSToUTF8(s, Uni949C, CP949CC);
     end;
     
     function CP932ToUTF8(const s: string): string;
     begin
    -  Result := DBCSToUTF8(s, 932);
    +  Result := DBCSToUTF8(s, Uni932C, CP932CC);
     end;
     
     {$IfNDef UseSystemCPConv}
    @@ -192,11 +176,7 @@
       c: char;
       Unicode: longword;
     begin
    -  if s = '' then
    -  begin
    -    Result := '';
    -    exit;
    -  end;
    +  if s = '' then exit('');
       len := length(s);
       SetLength(Result, len); // DBCS needs at most space as UTF-8
       Src  := PChar(s);
    Index: components/lazutils/lconvencoding.pas
    ===================================================================
    --- components/lazutils/lconvencoding.pas	(revision 61901)
    +++ components/lazutils/lconvencoding.pas	(working copy)
    @@ -191,8 +191,6 @@
     function CP949ToUTF8(const s: string): string;      // Korea
     function CP950ToUTF8(const s: string): string;      // Chinese Complex
     
    -function DBCSToUTF8(const s: string; CodeP: integer): string;
    -
     {$ifdef FPC_HAS_CPSTRING}
     function UTF8ToCP932(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Japanese
     function UTF8ToCP936(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
    @@ -5345,10 +5343,7 @@
       p: PChar;
       c: Char;
     begin
    -  if s='' then begin
    -    Result:='';
    -    exit;
    -  end;
    +  if s='' then exit('');
       len:=length(s);
       SetLength(Result,len*4);// UTF-8 is at most 4 bytes
       Src:=PChar(s);
    @@ -7008,10 +7003,7 @@
       c: Char;
       Unicode: LongWord;
     begin
    -  if s='' then begin
    -    Result:='';
    -    exit;
    -  end;
    +  if s='' then exit('');
       len:=length(s);
       SetLength(Result,len);
       Src:=PChar(s);
    @@ -7050,10 +7042,7 @@
       Unicode: LongWord;
       CharLen: integer;
     begin
    -  if s='' then begin
    -    Result:='';
    -    exit;
    -  end;
    +  if s='' then exit('');
       len:=length(s);
       SetLength(Result,len*2);
       Src:=PChar(s);
    @@ -7090,10 +7079,7 @@
       Unicode: LongWord;
       CharLen: integer;
     begin
    -  if s='' then begin
    -    Result:='';
    -    exit;
    -  end;
    +  if s='' then exit('');
       len:=length(s);
       SetLength(Result,len*2);
       Src:=PChar(s);
    @@ -7218,10 +7204,7 @@
       i: LongInt;
     begin
       l:=length(s);
    -  if l=0 then begin
    -    Result:='';
    -    exit;
    -  end;
    +  if l=0 then exit('');
       p:=PChar(s);
     
       // try UTF-8 BOM (Byte Order Mark)
    
    lc.diff (4,064 bytes)

Activities

Alexey Tor.

2019-09-19 12:04

reporter  

lc.diff (4,064 bytes)
Index: components/lazutils/asiancodepagefunctions.inc
===================================================================
--- components/lazutils/asiancodepagefunctions.inc	(revision 61901)
+++ components/lazutils/asiancodepagefunctions.inc	(working copy)
@@ -12,7 +12,7 @@
   The clipboard is able to work with the windows and gtk behaviour/features.
 }
 
-function DBCSToUTF8(const s: string; CodeP: integer): string;
+function DBCSToUTF8(const s: string; const ArrayUni, ArrayCP: array of word): string;
 var
   len, l: Integer;
   Src, Dest: PChar;
@@ -19,11 +19,7 @@
   c: char;
   code: word;
 begin
-  if s = '' then
-  begin
-    Result := s;
-    exit;
-  end;
+  if s = '' then exit('');
   len := length(s);
   SetLength(Result, len * 4);// Asia UTF-8 is at most 4 bytes
   Src  := PChar(s);
@@ -45,19 +41,7 @@
       code := code + Byte(c);
       Inc(Src);
 
-      case CodeP of
-        936:
-          code := Uni936C[SearchTable(CP936CC, code)];
-        950:
-          code := Uni950C[SearchTable(CP950CC, code)];
-        949:
-          code := Uni949C[SearchTable(CP949CC, code)];
-        932:
-          code := Uni932C[SearchTable(CP932CC, code)];
-        else
-          code := 0;
-      end;
-
+      code := ArrayUni[SearchTable(ArrayCP, code)];
       if code>0 then
       begin
         l:=UnicodeToUTF8Inline(code,Dest);
@@ -70,22 +54,22 @@
 
 function CP936ToUTF8(const s: string): string;
 begin
-  Result := DBCSToUTF8(s, 936);
+  Result := DBCSToUTF8(s, Uni936C, CP936CC);
 end;
 
 function CP950ToUTF8(const s: string): string;
 begin
-  Result := DBCSToUTF8(s, 950);
+  Result := DBCSToUTF8(s, Uni950C, CP950CC);
 end;
 
 function CP949ToUTF8(const s: string): string;
 begin
-  Result := DBCSToUTF8(s, 949);
+  Result := DBCSToUTF8(s, Uni949C, CP949CC);
 end;
 
 function CP932ToUTF8(const s: string): string;
 begin
-  Result := DBCSToUTF8(s, 932);
+  Result := DBCSToUTF8(s, Uni932C, CP932CC);
 end;
 
 {$IfNDef UseSystemCPConv}
@@ -192,11 +176,7 @@
   c: char;
   Unicode: longword;
 begin
-  if s = '' then
-  begin
-    Result := '';
-    exit;
-  end;
+  if s = '' then exit('');
   len := length(s);
   SetLength(Result, len); // DBCS needs at most space as UTF-8
   Src  := PChar(s);
Index: components/lazutils/lconvencoding.pas
===================================================================
--- components/lazutils/lconvencoding.pas	(revision 61901)
+++ components/lazutils/lconvencoding.pas	(working copy)
@@ -191,8 +191,6 @@
 function CP949ToUTF8(const s: string): string;      // Korea
 function CP950ToUTF8(const s: string): string;      // Chinese Complex
 
-function DBCSToUTF8(const s: string; CodeP: integer): string;
-
 {$ifdef FPC_HAS_CPSTRING}
 function UTF8ToCP932(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Japanese
 function UTF8ToCP936(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
@@ -5345,10 +5343,7 @@
   p: PChar;
   c: Char;
 begin
-  if s='' then begin
-    Result:='';
-    exit;
-  end;
+  if s='' then exit('');
   len:=length(s);
   SetLength(Result,len*4);// UTF-8 is at most 4 bytes
   Src:=PChar(s);
@@ -7008,10 +7003,7 @@
   c: Char;
   Unicode: LongWord;
 begin
-  if s='' then begin
-    Result:='';
-    exit;
-  end;
+  if s='' then exit('');
   len:=length(s);
   SetLength(Result,len);
   Src:=PChar(s);
@@ -7050,10 +7042,7 @@
   Unicode: LongWord;
   CharLen: integer;
 begin
-  if s='' then begin
-    Result:='';
-    exit;
-  end;
+  if s='' then exit('');
   len:=length(s);
   SetLength(Result,len*2);
   Src:=PChar(s);
@@ -7090,10 +7079,7 @@
   Unicode: LongWord;
   CharLen: integer;
 begin
-  if s='' then begin
-    Result:='';
-    exit;
-  end;
+  if s='' then exit('');
   len:=length(s);
   SetLength(Result,len*2);
   Src:=PChar(s);
@@ -7218,10 +7204,7 @@
   i: LongInt;
 begin
   l:=length(s);
-  if l=0 then begin
-    Result:='';
-    exit;
-  end;
+  if l=0 then exit('');
   p:=PChar(s);
 
   // try UTF-8 BOM (Byte Order Mark)
lc.diff (4,064 bytes)

Juha Manninen

2019-10-03 20:26

developer   ~0118293

I think it is OK.
Applied, thanks.

Issue History

Date Modified Username Field Change
2019-09-19 12:04 Alexey Tor. New Issue
2019-09-19 12:04 Alexey Tor. File Added: lc.diff
2019-10-03 20:19 Juha Manninen Assigned To => Juha Manninen
2019-10-03 20:19 Juha Manninen Status new => assigned
2019-10-03 20:26 Juha Manninen Status assigned => resolved
2019-10-03 20:26 Juha Manninen Resolution open => fixed
2019-10-03 20:26 Juha Manninen Fixed in Revision => r61963
2019-10-03 20:26 Juha Manninen LazTarget => -
2019-10-03 20:26 Juha Manninen Note Added: 0118293