View Issue Details

IDProjectCategoryView StatusLast Update
0034344FPCPackagespublic2018-09-27 13:54
ReporterPascal RiekenbergAssigned ToMichael Van Canneyt 
PrioritynormalSeverityminorReproducibilityalways
Status closedResolutionfixed 
Platformi386OSWindows 10 x64OS Version1803
Product Version3.3.1Product Build39823 
Target Version3.2.0Fixed in Version3.3.1 
Summary0034344: [Patch] Make TPascalScanner ignore UTF-8-BOM
DescriptionIf you call TPascalScanner.OpenFile() for a file with UTF-8-BOM
TPascalScanner.FetchToken throws an EScannerError: source.pas(1,1) Error: Invalid character '?'
Additional InformationAttached patch fixes this.
TagsNo tags attached.
Fixed in Revision39832
FPCOldBugId
FPCTarget
Attached Files
  • pscanner.pp.patch (647 bytes)
    Index: packages/fcl-passrc/src/pscanner.pp
    ===================================================================
    --- packages/fcl-passrc/src/pscanner.pp	(revision 39823)
    +++ packages/fcl-passrc/src/pscanner.pp	(working copy)
    @@ -4259,6 +4259,13 @@
         FCurLine := CurSourceFile.ReadLine;
         FTokenStr := PChar(CurLine);
         Result := true;
    +    if (FCurRow = 0) 
    +    and (Length(CurLine) >= 3)
    +    and (FTokenStr[0] = Char($EF))
    +    and (FTokenStr[1] = Char($BB))
    +    and (FTokenStr[2] = Char($BF)) then
    +      // ignore UTF-8 Byte Order Mark
    +      inc(FTokenStr, 3);
         Inc(FCurRow);
         inc(FModuleRow);
         FCurColumnOffset:=1;
    
    pscanner.pp.patch (647 bytes)
  • pscanner.pp.2.patch (1,794 bytes)
    Index: packages/fcl-passrc/src/pscanner.pp
    ===================================================================
    --- packages/fcl-passrc/src/pscanner.pp	(revision 39823)
    +++ packages/fcl-passrc/src/pscanner.pp	(working copy)
    @@ -4259,6 +4259,13 @@
         FCurLine := CurSourceFile.ReadLine;
         FTokenStr := PChar(CurLine);
         Result := true;
    +    if (FCurRow = 0)
    +    and (Length(CurLine) >= 3)
    +    and (FTokenStr[0] = #$EF)
    +    and (FTokenStr[1] = #$BB)
    +    and (FTokenStr[2] = #$BF) then
    +      // ignore UTF-8 Byte Order Mark
    +      inc(FTokenStr, 3);
         Inc(FCurRow);
         inc(FModuleRow);
         FCurColumnOffset:=1;
    Index: packages/fcl-passrc/tests/tcresolver.pas
    ===================================================================
    --- packages/fcl-passrc/tests/tcresolver.pas	(revision 39823)
    +++ packages/fcl-passrc/tests/tcresolver.pas	(working copy)
    @@ -916,9 +916,7 @@
     {$IFDEF CheckPasTreeRefCount}
     var El: TPasElement;
     {$ENDIF}
    -{$IF defined(VerbosePasResolver) or defined(VerbosePasResolverMem)}
     var i: Integer;
    -{$ENDIF}
     begin
       FResolverMsgs.Clear;
       FResolverGoodMsgs.Clear;
    Index: packages/fcl-passrc/tests/tcscanner.pas
    ===================================================================
    --- packages/fcl-passrc/tests/tcscanner.pas	(revision 39823)
    +++ packages/fcl-passrc/tests/tcscanner.pas	(working copy)
    @@ -244,6 +244,7 @@
         procedure TestIfError;
         Procedure TestModeSwitch;
         Procedure TestOperatorIdentifier;
    +    Procedure TestUTF8BOM;
       end;
     
     implementation
    @@ -1744,6 +1745,12 @@
       TestToken(tkidentifier,'operator',True);
     end;
     
    +procedure TTestScanner.TestUTF8BOM;
    +
    +begin
    +  DoTestToken(tkLineEnding,#$EF+#$BB+#$BF);
    +end;
    +
     initialization
       RegisterTests([TTestTokenFinder,TTestStreamLineReader,TTestScanner]);
     end.
    
    pscanner.pp.2.patch (1,794 bytes)

Activities

Pascal Riekenberg

2018-09-27 09:24

reporter  

pscanner.pp.patch (647 bytes)
Index: packages/fcl-passrc/src/pscanner.pp
===================================================================
--- packages/fcl-passrc/src/pscanner.pp	(revision 39823)
+++ packages/fcl-passrc/src/pscanner.pp	(working copy)
@@ -4259,6 +4259,13 @@
     FCurLine := CurSourceFile.ReadLine;
     FTokenStr := PChar(CurLine);
     Result := true;
+    if (FCurRow = 0) 
+    and (Length(CurLine) >= 3)
+    and (FTokenStr[0] = Char($EF))
+    and (FTokenStr[1] = Char($BB))
+    and (FTokenStr[2] = Char($BF)) then
+      // ignore UTF-8 Byte Order Mark
+      inc(FTokenStr, 3);
     Inc(FCurRow);
     inc(FModuleRow);
     FCurColumnOffset:=1;
pscanner.pp.patch (647 bytes)

Michael Van Canneyt

2018-09-27 09:30

administrator   ~0111048

Can you please provide a small test case for the scanner testsuite ?

Pascal Riekenberg

2018-09-27 11:32

reporter  

pscanner.pp.2.patch (1,794 bytes)
Index: packages/fcl-passrc/src/pscanner.pp
===================================================================
--- packages/fcl-passrc/src/pscanner.pp	(revision 39823)
+++ packages/fcl-passrc/src/pscanner.pp	(working copy)
@@ -4259,6 +4259,13 @@
     FCurLine := CurSourceFile.ReadLine;
     FTokenStr := PChar(CurLine);
     Result := true;
+    if (FCurRow = 0)
+    and (Length(CurLine) >= 3)
+    and (FTokenStr[0] = #$EF)
+    and (FTokenStr[1] = #$BB)
+    and (FTokenStr[2] = #$BF) then
+      // ignore UTF-8 Byte Order Mark
+      inc(FTokenStr, 3);
     Inc(FCurRow);
     inc(FModuleRow);
     FCurColumnOffset:=1;
Index: packages/fcl-passrc/tests/tcresolver.pas
===================================================================
--- packages/fcl-passrc/tests/tcresolver.pas	(revision 39823)
+++ packages/fcl-passrc/tests/tcresolver.pas	(working copy)
@@ -916,9 +916,7 @@
 {$IFDEF CheckPasTreeRefCount}
 var El: TPasElement;
 {$ENDIF}
-{$IF defined(VerbosePasResolver) or defined(VerbosePasResolverMem)}
 var i: Integer;
-{$ENDIF}
 begin
   FResolverMsgs.Clear;
   FResolverGoodMsgs.Clear;
Index: packages/fcl-passrc/tests/tcscanner.pas
===================================================================
--- packages/fcl-passrc/tests/tcscanner.pas	(revision 39823)
+++ packages/fcl-passrc/tests/tcscanner.pas	(working copy)
@@ -244,6 +244,7 @@
     procedure TestIfError;
     Procedure TestModeSwitch;
     Procedure TestOperatorIdentifier;
+    Procedure TestUTF8BOM;
   end;
 
 implementation
@@ -1744,6 +1745,12 @@
   TestToken(tkidentifier,'operator',True);
 end;
 
+procedure TTestScanner.TestUTF8BOM;
+
+begin
+  DoTestToken(tkLineEnding,#$EF+#$BB+#$BF);
+end;
+
 initialization
   RegisterTests([TTestTokenFinder,TTestStreamLineReader,TTestScanner]);
 end.
pscanner.pp.2.patch (1,794 bytes)

Pascal Riekenberg

2018-09-27 11:34

reporter   ~0111051

See attache patch.

I also correected an error in tcresolver.pp:
The first use of local variable i is not dependent on defines!

Michael Van Canneyt

2018-09-27 13:17

administrator   ~0111052

Applied, many thanks for the useful patch!

Pascal Riekenberg

2018-09-27 13:54

reporter   ~0111053

yw

Issue History

Date Modified Username Field Change
2018-09-27 09:24 Pascal Riekenberg New Issue
2018-09-27 09:24 Pascal Riekenberg File Added: pscanner.pp.patch
2018-09-27 09:29 Michael Van Canneyt Assigned To => Michael Van Canneyt
2018-09-27 09:29 Michael Van Canneyt Status new => assigned
2018-09-27 09:30 Michael Van Canneyt Note Added: 0111048
2018-09-27 09:30 Michael Van Canneyt Status assigned => feedback
2018-09-27 11:32 Pascal Riekenberg File Added: pscanner.pp.2.patch
2018-09-27 11:34 Pascal Riekenberg Note Added: 0111051
2018-09-27 11:34 Pascal Riekenberg Status feedback => assigned
2018-09-27 13:17 Michael Van Canneyt Fixed in Revision => 39832
2018-09-27 13:17 Michael Van Canneyt Note Added: 0111052
2018-09-27 13:17 Michael Van Canneyt Status assigned => resolved
2018-09-27 13:17 Michael Van Canneyt Fixed in Version => 3.3.1
2018-09-27 13:17 Michael Van Canneyt Resolution open => fixed
2018-09-27 13:17 Michael Van Canneyt Target Version => 3.2.0
2018-09-27 13:54 Pascal Riekenberg Note Added: 0111053
2018-09-27 13:54 Pascal Riekenberg Status resolved => closed