View Issue Details

IDProjectCategoryView StatusLast Update
0033686Patches-public2018-11-01 09:40
ReporterAlexey Tor.Assigned ToJuha Manninen 
PrioritynormalSeverityminorReproducibilityN/A
Status resolvedResolutionfixed 
PlatformLinux x64OSOS Version
Product Version1.9 (SVN)Product Build 
Target VersionFixed in Version 
Summary0033686: LConvEncoding: added ErrorMode for many UTF8To functions
DescriptionThis is adding of ErrorMode parameter to all UTF8To* functions,
except UTF8 to UCS2 and UTF8 to UTF8 with/no bom.
Functions give error [inside it's char code =-1]
which was ignored before. Now it's ErrorMode:
- ignore error (like before)
- replace error to "?" (it is global variable char)
- raise exception

It's needed for CudaText to check, that user can save Unicode text to current ANSI codepage.
quick tested on CP1251 errors.
tested with define FPC_HAS_CPSTRING and w/o it.
TagsNo tags attached.
Fixed in Revisionr59416
LazTarget-
Widgetset
Attached Files
  • c1.diff (43,370 bytes)
    Index: components/lazutils/asiancodepagefunctions.inc
    ===================================================================
    --- components/lazutils/asiancodepagefunctions.inc	(revision 57782)
    +++ components/lazutils/asiancodepagefunctions.inc	(working copy)
    @@ -132,7 +132,8 @@
     procedure InternalUTF8ToDBCS(const s: string; TargetCodePage: TSystemCodePage;
       SetTargetCodePage: boolean;
       {$IfNDef UseSystemCPConv}const UTF8CharConvFunc: TUnicodeToCharID;{$endif}
    -  out TheResult: RawByteString); inline;
    +  out TheResult: RawByteString;
    +  ErrorMode: TEncodingErrorMode=eemIgnore); inline;
     begin
       {$ifdef UseSystemCPConv}
       TheResult:=s;
    @@ -140,54 +141,59 @@
       if not SetTargetCodePage then
         SetCodePage(TheResult, CP_ACP, False);
       {$else}
    -  TheResult:=UTF8ToDBCS(s,UTF8CharConvFunc);
    +  TheResult:=UTF8ToDBCS(s,UTF8CharConvFunc,ErrorMode);
       if SetTargetCodePage then
         SetCodePage(TheResult, TargetCodePage, False);
       {$endif}
     end;
     
    -function UTF8ToCP932(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP932(const s: string; SetTargetCodePage: boolean;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
    -  InternalUTF8ToDBCS(s,932,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP932{$endif},Result);
    +  InternalUTF8ToDBCS(s,932,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP932{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP936(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP936(const s: string; SetTargetCodePage: boolean;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
    -  InternalUTF8ToDBCS(s,936,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP936{$endif},Result);
    +  InternalUTF8ToDBCS(s,936,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP936{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP949(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP949(const s: string; SetTargetCodePage: boolean;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
    -  InternalUTF8ToDBCS(s,949,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP949{$endif},Result);
    +  InternalUTF8ToDBCS(s,949,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP949{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP950(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP950(const s: string; SetTargetCodePage: boolean;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
    -  InternalUTF8ToDBCS(s,950,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP950{$endif},Result);
    +  InternalUTF8ToDBCS(s,950,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP950{$endif},Result,ErrorMode);
     end;
     {$ELSE}
    -function UTF8ToCP932(const s: string): string;
    +function UTF8ToCP932(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result := UTF8ToDBCS(s, @UnicodeToCP932);
    +  Result := UTF8ToDBCS(s, @UnicodeToCP932, ErrorMode);
     end;
     
    -function UTF8ToCP936(const s: string): string;
    +function UTF8ToCP936(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result := UTF8ToDBCS(s, @UnicodeToCP936);
    +  Result := UTF8ToDBCS(s, @UnicodeToCP936, ErrorMode);
     end;
     
    -function UTF8ToCP949(const s: string): string;
    +function UTF8ToCP949(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result := UTF8ToDBCS(s, @UnicodeToCP949);
    +  Result := UTF8ToDBCS(s, @UnicodeToCP949, ErrorMode);
     end;
     
    -function UTF8ToCP950(const s: string): string;
    +function UTF8ToCP950(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result := UTF8ToDBCS(s, @UnicodeToCP950);
    +  Result := UTF8ToDBCS(s, @UnicodeToCP950, ErrorMode);
     end;
     {$ENDIF}
     
    -function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string;
    +function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string;
     var
       len:  integer;
       Src:  PChar;
    @@ -232,6 +238,19 @@
             else
               Dest^ := chr(i);
             Inc(Dest);
    +      end
    +      else
    +      case ErrorMode of
    +        eemReplace:
    +          begin
    +            Dest^ := EncodingReplacementChar;
    +            Inc(Dest);
    +          end;
    +        eemException:
    +          begin
    +            raise EConvertError.Create(
    +              Format('LConvEncoding cannot convert Unicode char %d', [Unicode]));
    +          end;
           end;
         end;
       until false;
    Index: components/lazutils/lconvencoding.pas
    ===================================================================
    --- components/lazutils/lconvencoding.pas	(revision 57782)
    +++ components/lazutils/lconvencoding.pas	(working copy)
    @@ -75,14 +75,26 @@
       UTF32BEBOM = #0#0#$FE#$FF;
       UTF32LEBOM = #$FE#$FF#0#0;
     
    +type
    +  TEncodingErrorMode = (
    +    eemIgnore,
    +    eemReplace,
    +    eemException
    +    );
    +
    +var
    +  EncodingReplacementChar: char = '?';
    +
     function GuessEncoding(const s: string): string;
     
     function ConvertEncodingFromUTF8(const s, ToEncoding: string; out Encoded: boolean
    -  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}): string;
    +  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}
    +  ; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     function ConvertEncodingToUTF8(const s, FromEncoding: string; out Encoded: boolean): string;
     // For UTF8 use the above functions, they save you one parameter
     function ConvertEncoding(const s, FromEncoding, ToEncoding: string
    -  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}): string;
    +  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}
    +  ; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     
     // This routine should obtain the encoding utilized by ansistring in the RTL
     function GetDefaultTextEncoding: string;
    @@ -95,9 +107,12 @@
     type
       TConvertEncodingFunction = function(const s: string): string;
       {$ifdef FPC_HAS_CPSTRING}
    -  TConvertUTF8ToEncodingFunc = function(const s: string; SetTargetCodePage: boolean = false): RawByteString;
    +  TConvertUTF8ToEncodingFunc = function(const s: string;
    +    SetTargetCodePage: boolean = false;
    +    ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
       {$else}
    -  TConvertUTF8ToEncodingFunc = function(const s: string): string;
    +  TConvertUTF8ToEncodingFunc = function(const s: string;
    +    ErrorMode: TEncodingErrorMode=eemIgnore): string;
       {$endif}
       TCharToUTF8Table = array[char] of PChar;
       TUnicodeToCharID = function(Unicode: cardinal): integer;
    @@ -131,52 +146,95 @@
     
     function UTF8ToUTF8BOM(const s: string): string; // UTF8 with BOM
     {$ifdef FPC_HAS_CPSTRING}
    -function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean = false): RawByteString; // central europe
    -function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean = false): RawByteString; // eastern europe
    -function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Western European languages
    -function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean = false): RawByteString; // central europe
    -function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean = false): RawByteString; // cyrillic
    -function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean = false): RawByteString; // latin 1
    -function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean = false): RawByteString; // greek
    -function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean = false): RawByteString; // turkish
    -function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean = false): RawByteString; // hebrew
    -function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean = false): RawByteString; // arabic
    -function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean = false): RawByteString; // baltic
    -function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean = false): RawByteString; // vietnam
    -function UTF8ToCP437(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // DOS central europe
    -function UTF8ToCP850(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // DOS western europe
    -function UTF8ToCP852(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // DOS central europe
    -function UTF8ToCP866(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // DOS and Windows console's cyrillic
    -function UTF8ToCP874(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // thai
    -function UTF8ToKOI8(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // russian cyrillic
    -function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // ukrainian cyrillic
    -function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // belarussian cyrillic
    -function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // Macintosh, alias Mac OS Roman
    +function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // central europe
    +function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // eastern europe
    +function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // Western European languages
    +function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // central europe
    +function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // cyrillic
    +function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // latin 1
    +function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // greek
    +function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // turkish
    +function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // hebrew
    +function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // arabic
    +function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // baltic
    +function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // vietnam
    +function UTF8ToCP437(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // DOS central europe
    +function UTF8ToCP850(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // DOS western europe
    +function UTF8ToCP852(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // DOS central europe
    +function UTF8ToCP866(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // DOS and Windows console's cyrillic
    +function UTF8ToCP874(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // thai
    +function UTF8ToKOI8(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // russian cyrillic
    +function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // ukrainian cyrillic
    +function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // belarussian cyrillic
    +function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // Macintosh, alias Mac OS Roman
     {$ELSE}
    -function UTF8ToISO_8859_1(const s: string): string; // central europe
    -function UTF8ToISO_8859_15(const s: string): string; // Western European languages
    -function UTF8ToISO_8859_2(const s: string): string; // eastern europe
    -function UTF8ToCP1250(const s: string): string; // central europe
    -function UTF8ToCP1251(const s: string): string; // cyrillic
    -function UTF8ToCP1252(const s: string): string; // latin 1
    -function UTF8ToCP1253(const s: string): string; // greek
    -function UTF8ToCP1254(const s: string): string; // turkish
    -function UTF8ToCP1255(const s: string): string; // hebrew
    -function UTF8ToCP1256(const s: string): string; // arabic
    -function UTF8ToCP1257(const s: string): string; // baltic
    -function UTF8ToCP1258(const s: string): string; // vietnam
    -function UTF8ToCP437(const s: string): string;  // DOS central europe
    -function UTF8ToCP850(const s: string): string;  // DOS western europe
    -function UTF8ToCP852(const s: string): string;  // DOS central europe
    -function UTF8ToCP866(const s: string): string;  // DOS and Windows console's cyrillic
    -function UTF8ToCP874(const s: string): string;  // thai
    -function UTF8ToKOI8(const s: string): string;  // russian cyrillic
    -function UTF8ToKOI8U(const s: string): string;  // ukrainian cyrillic
    -function UTF8ToKOI8RU(const s: string): string;  // belarussian cyrillic
    -function UTF8ToMacintosh(const s: string): string;  // Macintosh, alias Mac OS Roman
    +function UTF8ToISO_8859_1(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string; // central europe
    +function UTF8ToISO_8859_15(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string; // Western European languages
    +function UTF8ToISO_8859_2(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string; // eastern europe
    +function UTF8ToCP1250(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string; // central europe
    +function UTF8ToCP1251(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string; // cyrillic
    +function UTF8ToCP1252(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string; // latin 1
    +function UTF8ToCP1253(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string; // greek
    +function UTF8ToCP1254(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string; // turkish
    +function UTF8ToCP1255(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string; // hebrew
    +function UTF8ToCP1256(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string; // arabic
    +function UTF8ToCP1257(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string; // baltic
    +function UTF8ToCP1258(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string; // vietnam
    +function UTF8ToCP437(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // DOS central europe
    +function UTF8ToCP850(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // DOS western europe
    +function UTF8ToCP852(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // DOS central europe
    +function UTF8ToCP866(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // DOS and Windows console's cyrillic
    +function UTF8ToCP874(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // thai
    +function UTF8ToKOI8(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // russian cyrillic
    +function UTF8ToKOI8U(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // ukrainian cyrillic
    +function UTF8ToKOI8RU(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // belarussian cyrillic
    +function UTF8ToMacintosh(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // Macintosh, alias Mac OS Roman
     {$ENDIF}
     // custom conversion
    -function UTF8ToSingleByte(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string;
    +function UTF8ToSingleByte(const s: string; const UTF8CharConvFunc: TUnicodeToCharID;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string;
     
     function UTF8ToUCS2LE(const s: string): string; // UCS2-LE 2byte little endian without BOM
     function UTF8ToUCS2BE(const s: string): string; // UCS2-BE 2byte big endian without BOM
    @@ -191,18 +249,27 @@
     function DBCSToUTF8(const s: string; CodeP: integer): string;
     
     {$ifdef FPC_HAS_CPSTRING}
    -function UTF8ToCP932(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Japanese
    -function UTF8ToCP936(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
    -function UTF8ToCP949(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Korea
    -function UTF8ToCP950(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Chinese Complex
    +function UTF8ToCP932(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // Japanese
    +function UTF8ToCP936(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
    +function UTF8ToCP949(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // Korea
    +function UTF8ToCP950(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // Chinese Complex
     {$ELSE}
    -function UTF8ToCP932(const s: string): string;      // Japanese
    -function UTF8ToCP936(const s: string): string;      // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
    -function UTF8ToCP949(const s: string): string;      // Korea
    -function UTF8ToCP950(const s: string): string;      // Chinese Complex
    +function UTF8ToCP932(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string;      // Japanese
    +function UTF8ToCP936(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string;      // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
    +function UTF8ToCP949(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string;      // Korea
    +function UTF8ToCP950(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string;      // Chinese Complex
     {$ENDIF}
     // Common function used by all UTF8ToXXX functions.
    -function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string;
    +function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string;
     {$ENDIF}
     
     procedure GetSupportedEncodings(List: TStrings);
    @@ -6763,7 +6830,8 @@
     procedure InternalUTF8ToCP(const s: string; TargetCodePage: TSystemCodePage;
       SetTargetCodePage: boolean;
       const UTF8CharConvFunc: TUnicodeToCharID;
    -  out TheResult: RawByteString); inline;
    +  out TheResult: RawByteString;
    +  ErrorMode: TEncodingErrorMode=eemIgnore); inline;
     begin
       if not Assigned(UTF8CharConvFunc) then
       begin
    @@ -6772,234 +6840,235 @@
         if not SetTargetCodePage then
           SetCodePage(TheResult, CP_ACP, False);
       end else begin
    -    TheResult:=UTF8ToSingleByte(s,UTF8CharConvFunc);
    +    TheResult:=UTF8ToSingleByte(s,UTF8CharConvFunc,ErrorMode);
         if SetTargetCodePage then
           SetCodePage(TheResult, TargetCodePage, False);
       end;
     end;
     
    -function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
    -  InternalUTF8ToCP(s,28591,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_1{$endif},Result);
    +  InternalUTF8ToCP(s,28591,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_1{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
    -  InternalUTF8ToCP(s,28592,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_2{$endif},Result);
    +  InternalUTF8ToCP(s,28592,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_2{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
    -  InternalUTF8ToCP(s,28605,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_15{$endif},Result);
    +  InternalUTF8ToCP(s,28605,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_15{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
       // system conversion fails for character #129 -> using table
    -  InternalUTF8ToCP(s,1250,SetTargetCodePage,@UnicodeToCP1250,Result);
    +  InternalUTF8ToCP(s,1250,SetTargetCodePage,@UnicodeToCP1250,Result,ErrorMode);
     end;
     
    -function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
       // system conversion fails for character #152 -> using table
    -  InternalUTF8ToCP(s,1251,SetTargetCodePage,@UnicodeToCP1251,Result);
    +  InternalUTF8ToCP(s,1251,SetTargetCodePage,@UnicodeToCP1251,Result,ErrorMode);
     end;
     
    -function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
       // system conversion fails for character #128 -> using table
    -  InternalUTF8ToCP(s,1252,SetTargetCodePage,@UnicodeToCP1252,Result);
    +  InternalUTF8ToCP(s,1252,SetTargetCodePage,@UnicodeToCP1252,Result,ErrorMode);
     end;
     
    -function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
       // system conversion fails for character #129 -> using table
    -  InternalUTF8ToCP(s,1253,SetTargetCodePage,@UnicodeToCP1253,Result);
    +  InternalUTF8ToCP(s,1253,SetTargetCodePage,@UnicodeToCP1253,Result,ErrorMode);
     end;
     
    -function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
       // system conversion fails for character #129 -> using table
    -  InternalUTF8ToCP(s,1254,SetTargetCodePage,@UnicodeToCP1254,Result);
    +  InternalUTF8ToCP(s,1254,SetTargetCodePage,@UnicodeToCP1254,Result,ErrorMode);
     end;
     
    -function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
       // system conversion fails for character #129 -> using table
    -  InternalUTF8ToCP(s,1255,SetTargetCodePage,@UnicodeToCP1255,Result);
    +  InternalUTF8ToCP(s,1255,SetTargetCodePage,@UnicodeToCP1255,Result,ErrorMode);
     end;
     
    -function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
    -  InternalUTF8ToCP(s,1256,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP1256{$endif},Result);
    +  InternalUTF8ToCP(s,1256,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP1256{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
       // system conversion fails for character #129 -> using table
    -  InternalUTF8ToCP(s,1257,SetTargetCodePage,@UnicodeToCP1257,Result);
    +  InternalUTF8ToCP(s,1257,SetTargetCodePage,@UnicodeToCP1257,Result,ErrorMode);
     end;
     
    -function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
       // system conversion fails for character #129 -> using table
    -  InternalUTF8ToCP(s,1258,SetTargetCodePage,@UnicodeToCP1258,Result);
    +  InternalUTF8ToCP(s,1258,SetTargetCodePage,@UnicodeToCP1258,Result,ErrorMode);
     end;
     
    -function UTF8ToCP437(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP437(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
    -  InternalUTF8ToCP(s,437,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP437{$endif},Result);
    +  InternalUTF8ToCP(s,437,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP437{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP850(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP850(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
    -  InternalUTF8ToCP(s,850,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP850{$endif},Result);
    +  InternalUTF8ToCP(s,850,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP850{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP852(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP852(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
    -  InternalUTF8ToCP(s,852,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP852{$endif},Result);
    +  InternalUTF8ToCP(s,852,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP852{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP866(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP866(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
    -  InternalUTF8ToCP(s,866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP866{$endif},Result);
    +  InternalUTF8ToCP(s,866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP866{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP874(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP874(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
       // system conversion fails for character #129 -> using table
    -  InternalUTF8ToCP(s,874,SetTargetCodePage,@UnicodeToCP874,Result);
    +  InternalUTF8ToCP(s,874,SetTargetCodePage,@UnicodeToCP874,Result,ErrorMode);
     end;
     
    -function UTF8ToKOI8(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToKOI8(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
    -  InternalUTF8ToCP(s,20866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8{$endif},Result);
    +  InternalUTF8ToCP(s,20866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
    -  InternalUTF8ToCP(s,21866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8U{$endif},Result);
    +  InternalUTF8ToCP(s,21866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8U{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
    -  InternalUTF8ToCP(s,21866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8RU{$endif},Result);
    +  InternalUTF8ToCP(s,21866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8RU{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
     begin
    -  InternalUTF8ToCP(s,10000,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToMacintosh{$endif},Result);
    +  InternalUTF8ToCP(s,10000,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToMacintosh{$endif},Result,ErrorMode);
     end;
     {$ELSE}
    -function UTF8ToISO_8859_1(const s: string): string;
    +function UTF8ToISO_8859_1(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_1);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_1,ErrorMode);
     end;
     
    -function UTF8ToISO_8859_15(const s: string): string;
    +function UTF8ToISO_8859_15(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_15);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_15,ErrorMode);
     end;
     
    -function UTF8ToISO_8859_2(const s: string): string;
    +function UTF8ToISO_8859_2(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_2);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_2,ErrorMode);
     end;
     
    -function UTF8ToCP1250(const s: string): string;
    +function UTF8ToCP1250(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1250);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1250,ErrorMode);
     end;
     
    -function UTF8ToCP1251(const s: string): string;
    +function UTF8ToCP1251(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1251);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1251,ErrorMode);
     end;
     
    -function UTF8ToCP1252(const s: string): string;
    +function UTF8ToCP1252(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1252);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1252,ErrorMode);
     end;
     
    -function UTF8ToCP1253(const s: string): string;
    +function UTF8ToCP1253(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1253);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1253,ErrorMode);
     end;
     
    -function UTF8ToCP1254(const s: string): string;
    +function UTF8ToCP1254(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1254);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1254,ErrorMode);
     end;
     
    -function UTF8ToCP1255(const s: string): string;
    +function UTF8ToCP1255(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1255);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1255,ErrorMode);
     end;
     
    -function UTF8ToCP1256(const s: string): string;
    +function UTF8ToCP1256(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1256);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1256,ErrorMode);
     end;
     
    -function UTF8ToCP1257(const s: string): string;
    +function UTF8ToCP1257(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1257);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1257,ErrorMode);
     end;
     
    -function UTF8ToCP1258(const s: string): string;
    +function UTF8ToCP1258(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1258);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1258,ErrorMode);
     end;
     
    -function UTF8ToCP437(const s: string): string;
    +function UTF8ToCP437(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP437);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP437,ErrorMode);
     end;
     
    -function UTF8ToCP850(const s: string): string;
    +function UTF8ToCP850(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP850);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP850,ErrorMode);
     end;
     
    -function UTF8ToCP852(const s: string): string;
    +function UTF8ToCP852(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP852);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP852,ErrorMode);
     end;
     
    -function UTF8ToCP866(const s: string): string;
    +function UTF8ToCP866(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP866);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP866,ErrorMode);
     end;
     
    -function UTF8ToCP874(const s: string): string;
    +function UTF8ToCP874(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP874);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP874,ErrorMode);
     end;
     
    -function UTF8ToKOI8(const s: string): string;
    +function UTF8ToKOI8(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8,ErrorMode);
     end;
     
    -function UTF8ToKOI8U(const s: string): string;
    +function UTF8ToKOI8U(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8U);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8U,ErrorMode);
     end;
     
    -function UTF8ToKOI8RU(const s: string): string;
    +function UTF8ToKOI8RU(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8RU);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8RU,ErrorMode);
     end;
     
    -function UTF8ToMacintosh(const s: string): string;
    +function UTF8ToMacintosh(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToMacintosh);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToMacintosh,ErrorMode);
     end;
     {$ENDIF}
     
     function UTF8ToSingleByte(const s: string;
    -  const UTF8CharConvFunc: TUnicodeToCharID): string;
    +  const UTF8CharConvFunc: TUnicodeToCharID;
    +  ErrorMode: TEncodingErrorMode=eemIgnore): string;
     var
       len: Integer;
       Src: PChar;
    @@ -7033,6 +7102,19 @@
           if i>=0 then begin
             Dest^:=chr(i);
             inc(Dest);
    +      end
    +      else
    +      case ErrorMode of
    +        eemReplace:
    +          begin
    +            Dest^:=EncodingReplacementChar;
    +            inc(Dest);
    +          end;
    +        eemException:
    +          begin;
    +            raise EConvertError.Create(
    +              Format('LConvEncoding cannot convert Unicode char %d', [Unicode]));
    +          end;
           end;
         end;
       end;
    @@ -7286,7 +7368,8 @@
     
     
     function ConvertEncodingFromUTF8(const s, ToEncoding: string; out Encoded: boolean
    -  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}): string;
    +  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}
    +  ; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     var
       ATo: string;
     
    @@ -7304,36 +7387,36 @@
       ATo:=NormalizeEncoding(ToEncoding);
     
       if ATo=EncodingUTF8BOM then begin Result:=UTF8ToUTF8BOM(s); exit; end;
    -  if ATo=EncodingCPIso1 then begin Result:=UTF8ToISO_8859_1(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCPIso15 then begin Result:=UTF8ToISO_8859_15(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCPIso2 then begin Result:=UTF8ToISO_8859_2(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1250 then begin Result:=UTF8ToCP1250(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1251 then begin Result:=UTF8ToCP1251(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1252 then begin Result:=UTF8ToCP1252(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1253 then begin Result:=UTF8ToCP1253(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1254 then begin Result:=UTF8ToCP1254(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1255 then begin Result:=UTF8ToCP1255(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1256 then begin Result:=UTF8ToCP1256(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1257 then begin Result:=UTF8ToCP1257(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1258 then begin Result:=UTF8ToCP1258(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP437 then begin Result:=UTF8ToCP437(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP850 then begin Result:=UTF8ToCP850(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP852 then begin Result:=UTF8ToCP852(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP866 then begin Result:=UTF8ToCP866(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP874 then begin Result:=UTF8ToCP874(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    +  if ATo=EncodingCPIso1 then begin Result:=UTF8ToISO_8859_1(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCPIso15 then begin Result:=UTF8ToISO_8859_15(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCPIso2 then begin Result:=UTF8ToISO_8859_2(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1250 then begin Result:=UTF8ToCP1250(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1251 then begin Result:=UTF8ToCP1251(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1252 then begin Result:=UTF8ToCP1252(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1253 then begin Result:=UTF8ToCP1253(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1254 then begin Result:=UTF8ToCP1254(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1255 then begin Result:=UTF8ToCP1255(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1256 then begin Result:=UTF8ToCP1256(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1257 then begin Result:=UTF8ToCP1257(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1258 then begin Result:=UTF8ToCP1258(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP437 then begin Result:=UTF8ToCP437(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP850 then begin Result:=UTF8ToCP850(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP852 then begin Result:=UTF8ToCP852(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP866 then begin Result:=UTF8ToCP866(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP874 then begin Result:=UTF8ToCP874(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
       {$IFnDEF DisableAsianCodePages}
    -  if ATo=EncodingCP936 then begin Result:=UTF8ToCP936(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP950 then begin Result:=UTF8ToCP950(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP949 then begin Result:=UTF8ToCP949(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP932 then begin Result:=UTF8ToCP932(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    +  if ATo=EncodingCP936 then begin Result:=UTF8ToCP936(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP950 then begin Result:=UTF8ToCP950(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP949 then begin Result:=UTF8ToCP949(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP932 then begin Result:=UTF8ToCP932(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
       {$ENDIF}
    -  if ATo=EncodingCPKOI8 then begin Result:=UTF8ToKOI8(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCPMac then begin Result:=UTF8ToMacintosh(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    +  if ATo=EncodingCPKOI8 then begin Result:=UTF8ToKOI8(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCPMac then begin Result:=UTF8ToMacintosh(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
       if ATo=EncodingUCS2LE then begin {$ifdef FPC_HAS_CPSTRING}CheckKeepCP;{$endif} Result:=UTF8ToUCS2LE(s); exit; end;
       if ATo=EncodingUCS2BE then begin {$ifdef FPC_HAS_CPSTRING}CheckKeepCP;{$endif} Result:=UTF8ToUCS2BE(s); exit; end;
     
       if (ATo=GetDefaultTextEncoding) and Assigned(ConvertUTF8ToAnsi) then begin
    -    Result:=ConvertUTF8ToAnsi(s);
    +    Result:=ConvertUTF8ToAnsi(s{$ifdef FPC_HAS_CPSTRING},false{$endif},ErrorMode);
         exit;
       end;
     
    @@ -7386,7 +7469,8 @@
     end;
     
     function ConvertEncoding(const s, FromEncoding, ToEncoding: string
    -  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean{$endif}): string;
    +  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean{$endif}
    +  ; ErrorMode: TEncodingErrorMode=eemIgnore): string;
     var
       AFrom, ATo, SysEnc : String;
       Encoded : Boolean;
    @@ -7414,7 +7498,7 @@
       //DebugLn(['ConvertEncoding ',AFrom,' ',ATo]);
     
       if AFrom=EncodingUTF8 then begin
    -    Result:=ConvertEncodingFromUTF8(s, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif});
    +    Result:=ConvertEncodingFromUTF8(s, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif}, ErrorMode);
         if Encoded then exit;
       end
       else
    @@ -7426,7 +7510,7 @@
       begin
         Result:=ConvertEncodingToUTF8(s, AFrom, Encoded);
         if Encoded then
    -      Result:=ConvertEncodingFromUTF8(Result, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif});
    +      Result:=ConvertEncodingFromUTF8(Result, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif}, ErrorMode);
         if Encoded then exit;
       end;
     
    
    c1.diff (43,370 bytes)
  • c2.diff (44,083 bytes)
    Index: components/lazutils/asiancodepagefunctions.inc
    ===================================================================
    --- components/lazutils/asiancodepagefunctions.inc	(revision 57941)
    +++ components/lazutils/asiancodepagefunctions.inc	(working copy)
    @@ -132,7 +132,8 @@
     procedure InternalUTF8ToDBCS(const s: string; TargetCodePage: TSystemCodePage;
       SetTargetCodePage: boolean;
       {$IfNDef UseSystemCPConv}const UTF8CharConvFunc: TUnicodeToCharID;{$endif}
    -  out TheResult: RawByteString); inline;
    +  out TheResult: RawByteString;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue); inline;
     begin
       {$ifdef UseSystemCPConv}
       TheResult:=s;
    @@ -140,54 +141,59 @@
       if not SetTargetCodePage then
         SetCodePage(TheResult, CP_ACP, False);
       {$else}
    -  TheResult:=UTF8ToDBCS(s,UTF8CharConvFunc);
    +  TheResult:=UTF8ToDBCS(s,UTF8CharConvFunc,ErrorMode);
       if SetTargetCodePage then
         SetCodePage(TheResult, TargetCodePage, False);
       {$endif}
     end;
     
    -function UTF8ToCP932(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP932(const s: string; SetTargetCodePage: boolean;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
    -  InternalUTF8ToDBCS(s,932,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP932{$endif},Result);
    +  InternalUTF8ToDBCS(s,932,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP932{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP936(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP936(const s: string; SetTargetCodePage: boolean;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
    -  InternalUTF8ToDBCS(s,936,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP936{$endif},Result);
    +  InternalUTF8ToDBCS(s,936,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP936{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP949(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP949(const s: string; SetTargetCodePage: boolean;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
    -  InternalUTF8ToDBCS(s,949,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP949{$endif},Result);
    +  InternalUTF8ToDBCS(s,949,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP949{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP950(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP950(const s: string; SetTargetCodePage: boolean;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
    -  InternalUTF8ToDBCS(s,950,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP950{$endif},Result);
    +  InternalUTF8ToDBCS(s,950,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP950{$endif},Result,ErrorMode);
     end;
     {$ELSE}
    -function UTF8ToCP932(const s: string): string;
    +function UTF8ToCP932(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result := UTF8ToDBCS(s, @UnicodeToCP932);
    +  Result := UTF8ToDBCS(s, @UnicodeToCP932, ErrorMode);
     end;
     
    -function UTF8ToCP936(const s: string): string;
    +function UTF8ToCP936(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result := UTF8ToDBCS(s, @UnicodeToCP936);
    +  Result := UTF8ToDBCS(s, @UnicodeToCP936, ErrorMode);
     end;
     
    -function UTF8ToCP949(const s: string): string;
    +function UTF8ToCP949(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result := UTF8ToDBCS(s, @UnicodeToCP949);
    +  Result := UTF8ToDBCS(s, @UnicodeToCP949, ErrorMode);
     end;
     
    -function UTF8ToCP950(const s: string): string;
    +function UTF8ToCP950(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result := UTF8ToDBCS(s, @UnicodeToCP950);
    +  Result := UTF8ToDBCS(s, @UnicodeToCP950, ErrorMode);
     end;
     {$ENDIF}
     
    -function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string;
    +function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     var
       len:  integer;
       Src:  PChar;
    @@ -232,6 +238,19 @@
             else
               Dest^ := chr(i);
             Inc(Dest);
    +      end
    +      else
    +      case ErrorMode of
    +        eemReplace:
    +          begin
    +            Dest^ := EncodingReplacementChar;
    +            Inc(Dest);
    +          end;
    +        eemException:
    +          begin
    +            raise EConvertError.Create(
    +              Format('LConvEncoding cannot convert Unicode char %d', [Unicode]));
    +          end;
           end;
         end;
       until false;
    Index: components/lazutils/lconvencoding.pas
    ===================================================================
    --- components/lazutils/lconvencoding.pas	(revision 57941)
    +++ components/lazutils/lconvencoding.pas	(working copy)
    @@ -75,14 +75,29 @@
       UTF32BEBOM = #0#0#$FE#$FF;
       UTF32LEBOM = #$FE#$FF#0#0;
     
    +type
    +  TEncodingErrorMode = (
    +    eemIgnore,
    +    eemReplace,
    +    eemException
    +    );
    +
    +const
    +  eemDefaultValue = eemIgnore;
    +
    +var
    +  EncodingReplacementChar: char = '?';
    +
     function GuessEncoding(const s: string): string;
     
     function ConvertEncodingFromUTF8(const s, ToEncoding: string; out Encoded: boolean
    -  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}): string;
    +  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}
    +  ; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     function ConvertEncodingToUTF8(const s, FromEncoding: string; out Encoded: boolean): string;
     // For UTF8 use the above functions, they save you one parameter
     function ConvertEncoding(const s, FromEncoding, ToEncoding: string
    -  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}): string;
    +  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}
    +  ; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     
     // This routine should obtain the encoding utilized by ansistring in the RTL
     function GetDefaultTextEncoding: string;
    @@ -95,9 +110,12 @@
     type
       TConvertEncodingFunction = function(const s: string): string;
       {$ifdef FPC_HAS_CPSTRING}
    -  TConvertUTF8ToEncodingFunc = function(const s: string; SetTargetCodePage: boolean = false): RawByteString;
    +  TConvertUTF8ToEncodingFunc = function(const s: string;
    +    SetTargetCodePage: boolean = false;
    +    ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
       {$else}
    -  TConvertUTF8ToEncodingFunc = function(const s: string): string;
    +  TConvertUTF8ToEncodingFunc = function(const s: string;
    +    ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
       {$endif}
       TCharToUTF8Table = array[char] of PChar;
       TUnicodeToCharID = function(Unicode: cardinal): integer;
    @@ -131,52 +149,95 @@
     
     function UTF8ToUTF8BOM(const s: string): string; // UTF8 with BOM
     {$ifdef FPC_HAS_CPSTRING}
    -function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean = false): RawByteString; // central europe
    -function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean = false): RawByteString; // eastern europe
    -function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Western European languages
    -function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean = false): RawByteString; // central europe
    -function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean = false): RawByteString; // cyrillic
    -function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean = false): RawByteString; // latin 1
    -function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean = false): RawByteString; // greek
    -function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean = false): RawByteString; // turkish
    -function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean = false): RawByteString; // hebrew
    -function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean = false): RawByteString; // arabic
    -function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean = false): RawByteString; // baltic
    -function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean = false): RawByteString; // vietnam
    -function UTF8ToCP437(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // DOS central europe
    -function UTF8ToCP850(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // DOS western europe
    -function UTF8ToCP852(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // DOS central europe
    -function UTF8ToCP866(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // DOS and Windows console's cyrillic
    -function UTF8ToCP874(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // thai
    -function UTF8ToKOI8(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // russian cyrillic
    -function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // ukrainian cyrillic
    -function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // belarussian cyrillic
    -function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // Macintosh, alias Mac OS Roman
    +function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // central europe
    +function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // eastern europe
    +function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // Western European languages
    +function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // central europe
    +function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // cyrillic
    +function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // latin 1
    +function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // greek
    +function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // turkish
    +function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // hebrew
    +function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // arabic
    +function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // baltic
    +function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // vietnam
    +function UTF8ToCP437(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // DOS central europe
    +function UTF8ToCP850(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // DOS western europe
    +function UTF8ToCP852(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // DOS central europe
    +function UTF8ToCP866(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // DOS and Windows console's cyrillic
    +function UTF8ToCP874(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // thai
    +function UTF8ToKOI8(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // russian cyrillic
    +function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // ukrainian cyrillic
    +function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // belarussian cyrillic
    +function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // Macintosh, alias Mac OS Roman
     {$ELSE}
    -function UTF8ToISO_8859_1(const s: string): string; // central europe
    -function UTF8ToISO_8859_15(const s: string): string; // Western European languages
    -function UTF8ToISO_8859_2(const s: string): string; // eastern europe
    -function UTF8ToCP1250(const s: string): string; // central europe
    -function UTF8ToCP1251(const s: string): string; // cyrillic
    -function UTF8ToCP1252(const s: string): string; // latin 1
    -function UTF8ToCP1253(const s: string): string; // greek
    -function UTF8ToCP1254(const s: string): string; // turkish
    -function UTF8ToCP1255(const s: string): string; // hebrew
    -function UTF8ToCP1256(const s: string): string; // arabic
    -function UTF8ToCP1257(const s: string): string; // baltic
    -function UTF8ToCP1258(const s: string): string; // vietnam
    -function UTF8ToCP437(const s: string): string;  // DOS central europe
    -function UTF8ToCP850(const s: string): string;  // DOS western europe
    -function UTF8ToCP852(const s: string): string;  // DOS central europe
    -function UTF8ToCP866(const s: string): string;  // DOS and Windows console's cyrillic
    -function UTF8ToCP874(const s: string): string;  // thai
    -function UTF8ToKOI8(const s: string): string;  // russian cyrillic
    -function UTF8ToKOI8U(const s: string): string;  // ukrainian cyrillic
    -function UTF8ToKOI8RU(const s: string): string;  // belarussian cyrillic
    -function UTF8ToMacintosh(const s: string): string;  // Macintosh, alias Mac OS Roman
    +function UTF8ToISO_8859_1(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // central europe
    +function UTF8ToISO_8859_15(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // Western European languages
    +function UTF8ToISO_8859_2(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // eastern europe
    +function UTF8ToCP1250(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // central europe
    +function UTF8ToCP1251(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // cyrillic
    +function UTF8ToCP1252(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // latin 1
    +function UTF8ToCP1253(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // greek
    +function UTF8ToCP1254(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // turkish
    +function UTF8ToCP1255(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // hebrew
    +function UTF8ToCP1256(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // arabic
    +function UTF8ToCP1257(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // baltic
    +function UTF8ToCP1258(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // vietnam
    +function UTF8ToCP437(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // DOS central europe
    +function UTF8ToCP850(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // DOS western europe
    +function UTF8ToCP852(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // DOS central europe
    +function UTF8ToCP866(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // DOS and Windows console's cyrillic
    +function UTF8ToCP874(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // thai
    +function UTF8ToKOI8(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // russian cyrillic
    +function UTF8ToKOI8U(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // ukrainian cyrillic
    +function UTF8ToKOI8RU(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // belarussian cyrillic
    +function UTF8ToMacintosh(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // Macintosh, alias Mac OS Roman
     {$ENDIF}
     // custom conversion
    -function UTF8ToSingleByte(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string;
    +function UTF8ToSingleByte(const s: string; const UTF8CharConvFunc: TUnicodeToCharID;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     
     function UTF8ToUCS2LE(const s: string): string; // UCS2-LE 2byte little endian without BOM
     function UTF8ToUCS2BE(const s: string): string; // UCS2-BE 2byte big endian without BOM
    @@ -191,18 +252,27 @@
     function DBCSToUTF8(const s: string; CodeP: integer): string;
     
     {$ifdef FPC_HAS_CPSTRING}
    -function UTF8ToCP932(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Japanese
    -function UTF8ToCP936(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
    -function UTF8ToCP949(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Korea
    -function UTF8ToCP950(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Chinese Complex
    +function UTF8ToCP932(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // Japanese
    +function UTF8ToCP936(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
    +function UTF8ToCP949(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // Korea
    +function UTF8ToCP950(const s: string; SetTargetCodePage: boolean = false;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // Chinese Complex
     {$ELSE}
    -function UTF8ToCP932(const s: string): string;      // Japanese
    -function UTF8ToCP936(const s: string): string;      // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
    -function UTF8ToCP949(const s: string): string;      // Korea
    -function UTF8ToCP950(const s: string): string;      // Chinese Complex
    +function UTF8ToCP932(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;      // Japanese
    +function UTF8ToCP936(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;      // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
    +function UTF8ToCP949(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;      // Korea
    +function UTF8ToCP950(const s: string;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;      // Chinese Complex
     {$ENDIF}
     // Common function used by all UTF8ToXXX functions.
    -function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string;
    +function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     {$ENDIF}
     
     procedure GetSupportedEncodings(List: TStrings);
    @@ -6763,7 +6833,8 @@
     procedure InternalUTF8ToCP(const s: string; TargetCodePage: TSystemCodePage;
       SetTargetCodePage: boolean;
       const UTF8CharConvFunc: TUnicodeToCharID;
    -  out TheResult: RawByteString); inline;
    +  out TheResult: RawByteString;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue); inline;
     begin
       if not Assigned(UTF8CharConvFunc) then
       begin
    @@ -6772,234 +6843,235 @@
         if not SetTargetCodePage then
           SetCodePage(TheResult, CP_ACP, False);
       end else begin
    -    TheResult:=UTF8ToSingleByte(s,UTF8CharConvFunc);
    +    TheResult:=UTF8ToSingleByte(s,UTF8CharConvFunc,ErrorMode);
         if SetTargetCodePage then
           SetCodePage(TheResult, TargetCodePage, False);
       end;
     end;
     
    -function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
    -  InternalUTF8ToCP(s,28591,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_1{$endif},Result);
    +  InternalUTF8ToCP(s,28591,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_1{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
    -  InternalUTF8ToCP(s,28592,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_2{$endif},Result);
    +  InternalUTF8ToCP(s,28592,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_2{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
    -  InternalUTF8ToCP(s,28605,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_15{$endif},Result);
    +  InternalUTF8ToCP(s,28605,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_15{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
       // system conversion fails for character #129 -> using table
    -  InternalUTF8ToCP(s,1250,SetTargetCodePage,@UnicodeToCP1250,Result);
    +  InternalUTF8ToCP(s,1250,SetTargetCodePage,@UnicodeToCP1250,Result,ErrorMode);
     end;
     
    -function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
       // system conversion fails for character #152 -> using table
    -  InternalUTF8ToCP(s,1251,SetTargetCodePage,@UnicodeToCP1251,Result);
    +  InternalUTF8ToCP(s,1251,SetTargetCodePage,@UnicodeToCP1251,Result,ErrorMode);
     end;
     
    -function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
       // system conversion fails for character #128 -> using table
    -  InternalUTF8ToCP(s,1252,SetTargetCodePage,@UnicodeToCP1252,Result);
    +  InternalUTF8ToCP(s,1252,SetTargetCodePage,@UnicodeToCP1252,Result,ErrorMode);
     end;
     
    -function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
       // system conversion fails for character #129 -> using table
    -  InternalUTF8ToCP(s,1253,SetTargetCodePage,@UnicodeToCP1253,Result);
    +  InternalUTF8ToCP(s,1253,SetTargetCodePage,@UnicodeToCP1253,Result,ErrorMode);
     end;
     
    -function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
       // system conversion fails for character #129 -> using table
    -  InternalUTF8ToCP(s,1254,SetTargetCodePage,@UnicodeToCP1254,Result);
    +  InternalUTF8ToCP(s,1254,SetTargetCodePage,@UnicodeToCP1254,Result,ErrorMode);
     end;
     
    -function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
       // system conversion fails for character #129 -> using table
    -  InternalUTF8ToCP(s,1255,SetTargetCodePage,@UnicodeToCP1255,Result);
    +  InternalUTF8ToCP(s,1255,SetTargetCodePage,@UnicodeToCP1255,Result,ErrorMode);
     end;
     
    -function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
    -  InternalUTF8ToCP(s,1256,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP1256{$endif},Result);
    +  InternalUTF8ToCP(s,1256,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP1256{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
       // system conversion fails for character #129 -> using table
    -  InternalUTF8ToCP(s,1257,SetTargetCodePage,@UnicodeToCP1257,Result);
    +  InternalUTF8ToCP(s,1257,SetTargetCodePage,@UnicodeToCP1257,Result,ErrorMode);
     end;
     
    -function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
       // system conversion fails for character #129 -> using table
    -  InternalUTF8ToCP(s,1258,SetTargetCodePage,@UnicodeToCP1258,Result);
    +  InternalUTF8ToCP(s,1258,SetTargetCodePage,@UnicodeToCP1258,Result,ErrorMode);
     end;
     
    -function UTF8ToCP437(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP437(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
    -  InternalUTF8ToCP(s,437,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP437{$endif},Result);
    +  InternalUTF8ToCP(s,437,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP437{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP850(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP850(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
    -  InternalUTF8ToCP(s,850,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP850{$endif},Result);
    +  InternalUTF8ToCP(s,850,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP850{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP852(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP852(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
    -  InternalUTF8ToCP(s,852,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP852{$endif},Result);
    +  InternalUTF8ToCP(s,852,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP852{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP866(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP866(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
    -  InternalUTF8ToCP(s,866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP866{$endif},Result);
    +  InternalUTF8ToCP(s,866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP866{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToCP874(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToCP874(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
       // system conversion fails for character #129 -> using table
    -  InternalUTF8ToCP(s,874,SetTargetCodePage,@UnicodeToCP874,Result);
    +  InternalUTF8ToCP(s,874,SetTargetCodePage,@UnicodeToCP874,Result,ErrorMode);
     end;
     
    -function UTF8ToKOI8(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToKOI8(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
    -  InternalUTF8ToCP(s,20866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8{$endif},Result);
    +  InternalUTF8ToCP(s,20866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
    -  InternalUTF8ToCP(s,21866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8U{$endif},Result);
    +  InternalUTF8ToCP(s,21866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8U{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
    -  InternalUTF8ToCP(s,21866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8RU{$endif},Result);
    +  InternalUTF8ToCP(s,21866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8RU{$endif},Result,ErrorMode);
     end;
     
    -function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean): RawByteString;
    +function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
     begin
    -  InternalUTF8ToCP(s,10000,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToMacintosh{$endif},Result);
    +  InternalUTF8ToCP(s,10000,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToMacintosh{$endif},Result,ErrorMode);
     end;
     {$ELSE}
    -function UTF8ToISO_8859_1(const s: string): string;
    +function UTF8ToISO_8859_1(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_1);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_1,ErrorMode);
     end;
     
    -function UTF8ToISO_8859_15(const s: string): string;
    +function UTF8ToISO_8859_15(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_15);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_15,ErrorMode);
     end;
     
    -function UTF8ToISO_8859_2(const s: string): string;
    +function UTF8ToISO_8859_2(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_2);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_2,ErrorMode);
     end;
     
    -function UTF8ToCP1250(const s: string): string;
    +function UTF8ToCP1250(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1250);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1250,ErrorMode);
     end;
     
    -function UTF8ToCP1251(const s: string): string;
    +function UTF8ToCP1251(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1251);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1251,ErrorMode);
     end;
     
    -function UTF8ToCP1252(const s: string): string;
    +function UTF8ToCP1252(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1252);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1252,ErrorMode);
     end;
     
    -function UTF8ToCP1253(const s: string): string;
    +function UTF8ToCP1253(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1253);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1253,ErrorMode);
     end;
     
    -function UTF8ToCP1254(const s: string): string;
    +function UTF8ToCP1254(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1254);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1254,ErrorMode);
     end;
     
    -function UTF8ToCP1255(const s: string): string;
    +function UTF8ToCP1255(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1255);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1255,ErrorMode);
     end;
     
    -function UTF8ToCP1256(const s: string): string;
    +function UTF8ToCP1256(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1256);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1256,ErrorMode);
     end;
     
    -function UTF8ToCP1257(const s: string): string;
    +function UTF8ToCP1257(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1257);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1257,ErrorMode);
     end;
     
    -function UTF8ToCP1258(const s: string): string;
    +function UTF8ToCP1258(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP1258);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP1258,ErrorMode);
     end;
     
    -function UTF8ToCP437(const s: string): string;
    +function UTF8ToCP437(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP437);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP437,ErrorMode);
     end;
     
    -function UTF8ToCP850(const s: string): string;
    +function UTF8ToCP850(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP850);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP850,ErrorMode);
     end;
     
    -function UTF8ToCP852(const s: string): string;
    +function UTF8ToCP852(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP852);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP852,ErrorMode);
     end;
     
    -function UTF8ToCP866(const s: string): string;
    +function UTF8ToCP866(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP866);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP866,ErrorMode);
     end;
     
    -function UTF8ToCP874(const s: string): string;
    +function UTF8ToCP874(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToCP874);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToCP874,ErrorMode);
     end;
     
    -function UTF8ToKOI8(const s: string): string;
    +function UTF8ToKOI8(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8,ErrorMode);
     end;
     
    -function UTF8ToKOI8U(const s: string): string;
    +function UTF8ToKOI8U(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8U);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8U,ErrorMode);
     end;
     
    -function UTF8ToKOI8RU(const s: string): string;
    +function UTF8ToKOI8RU(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8RU);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8RU,ErrorMode);
     end;
     
    -function UTF8ToMacintosh(const s: string): string;
    +function UTF8ToMacintosh(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     begin
    -  Result:=UTF8ToSingleByte(s,@UnicodeToMacintosh);
    +  Result:=UTF8ToSingleByte(s,@UnicodeToMacintosh,ErrorMode);
     end;
     {$ENDIF}
     
     function UTF8ToSingleByte(const s: string;
    -  const UTF8CharConvFunc: TUnicodeToCharID): string;
    +  const UTF8CharConvFunc: TUnicodeToCharID;
    +  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     var
       len: Integer;
       Src: PChar;
    @@ -7033,6 +7105,19 @@
           if i>=0 then begin
             Dest^:=chr(i);
             inc(Dest);
    +      end
    +      else
    +      case ErrorMode of
    +        eemReplace:
    +          begin
    +            Dest^:=EncodingReplacementChar;
    +            inc(Dest);
    +          end;
    +        eemException:
    +          begin;
    +            raise EConvertError.Create(
    +              Format('LConvEncoding cannot convert Unicode char %d', [Unicode]));
    +          end;
           end;
         end;
       end;
    @@ -7286,7 +7371,8 @@
     
     
     function ConvertEncodingFromUTF8(const s, ToEncoding: string; out Encoded: boolean
    -  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}): string;
    +  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}
    +  ; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     var
       ATo: string;
     
    @@ -7304,36 +7390,36 @@
       ATo:=NormalizeEncoding(ToEncoding);
     
       if ATo=EncodingUTF8BOM then begin Result:=UTF8ToUTF8BOM(s); exit; end;
    -  if ATo=EncodingCPIso1 then begin Result:=UTF8ToISO_8859_1(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCPIso15 then begin Result:=UTF8ToISO_8859_15(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCPIso2 then begin Result:=UTF8ToISO_8859_2(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1250 then begin Result:=UTF8ToCP1250(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1251 then begin Result:=UTF8ToCP1251(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1252 then begin Result:=UTF8ToCP1252(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1253 then begin Result:=UTF8ToCP1253(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1254 then begin Result:=UTF8ToCP1254(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1255 then begin Result:=UTF8ToCP1255(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1256 then begin Result:=UTF8ToCP1256(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1257 then begin Result:=UTF8ToCP1257(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP1258 then begin Result:=UTF8ToCP1258(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP437 then begin Result:=UTF8ToCP437(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP850 then begin Result:=UTF8ToCP850(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP852 then begin Result:=UTF8ToCP852(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP866 then begin Result:=UTF8ToCP866(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP874 then begin Result:=UTF8ToCP874(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    +  if ATo=EncodingCPIso1 then begin Result:=UTF8ToISO_8859_1(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCPIso15 then begin Result:=UTF8ToISO_8859_15(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCPIso2 then begin Result:=UTF8ToISO_8859_2(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1250 then begin Result:=UTF8ToCP1250(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1251 then begin Result:=UTF8ToCP1251(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1252 then begin Result:=UTF8ToCP1252(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1253 then begin Result:=UTF8ToCP1253(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1254 then begin Result:=UTF8ToCP1254(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1255 then begin Result:=UTF8ToCP1255(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1256 then begin Result:=UTF8ToCP1256(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1257 then begin Result:=UTF8ToCP1257(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP1258 then begin Result:=UTF8ToCP1258(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP437 then begin Result:=UTF8ToCP437(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP850 then begin Result:=UTF8ToCP850(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP852 then begin Result:=UTF8ToCP852(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP866 then begin Result:=UTF8ToCP866(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP874 then begin Result:=UTF8ToCP874(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
       {$IFnDEF DisableAsianCodePages}
    -  if ATo=EncodingCP936 then begin Result:=UTF8ToCP936(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP950 then begin Result:=UTF8ToCP950(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP949 then begin Result:=UTF8ToCP949(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCP932 then begin Result:=UTF8ToCP932(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    +  if ATo=EncodingCP936 then begin Result:=UTF8ToCP936(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP950 then begin Result:=UTF8ToCP950(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP949 then begin Result:=UTF8ToCP949(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCP932 then begin Result:=UTF8ToCP932(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
       {$ENDIF}
    -  if ATo=EncodingCPKOI8 then begin Result:=UTF8ToKOI8(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    -  if ATo=EncodingCPMac then begin Result:=UTF8ToMacintosh(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
    +  if ATo=EncodingCPKOI8 then begin Result:=UTF8ToKOI8(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
    +  if ATo=EncodingCPMac then begin Result:=UTF8ToMacintosh(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
       if ATo=EncodingUCS2LE then begin {$ifdef FPC_HAS_CPSTRING}CheckKeepCP;{$endif} Result:=UTF8ToUCS2LE(s); exit; end;
       if ATo=EncodingUCS2BE then begin {$ifdef FPC_HAS_CPSTRING}CheckKeepCP;{$endif} Result:=UTF8ToUCS2BE(s); exit; end;
     
       if (ATo=GetDefaultTextEncoding) and Assigned(ConvertUTF8ToAnsi) then begin
    -    Result:=ConvertUTF8ToAnsi(s);
    +    Result:=ConvertUTF8ToAnsi(s{$ifdef FPC_HAS_CPSTRING},false{$endif},ErrorMode);
         exit;
       end;
     
    @@ -7386,7 +7472,8 @@
     end;
     
     function ConvertEncoding(const s, FromEncoding, ToEncoding: string
    -  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean{$endif}): string;
    +  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean{$endif}
    +  ; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
     var
       AFrom, ATo, SysEnc : String;
       Encoded : Boolean;
    @@ -7414,7 +7501,7 @@
       //DebugLn(['ConvertEncoding ',AFrom,' ',ATo]);
     
       if AFrom=EncodingUTF8 then begin
    -    Result:=ConvertEncodingFromUTF8(s, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif});
    +    Result:=ConvertEncodingFromUTF8(s, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif}, ErrorMode);
         if Encoded then exit;
       end
       else
    @@ -7426,7 +7513,7 @@
       begin
         Result:=ConvertEncodingToUTF8(s, AFrom, Encoded);
         if Encoded then
    -      Result:=ConvertEncodingFromUTF8(Result, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif});
    +      Result:=ConvertEncodingFromUTF8(Result, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif}, ErrorMode);
         if Encoded then exit;
       end;
     
    
    c2.diff (44,083 bytes)
  • conv-simple.diff (763 bytes)
    Index: components/lazutils/lconvencoding.pas
    ===================================================================
    --- components/lazutils/lconvencoding.pas	(revision 59166)
    +++ components/lazutils/lconvencoding.pas	(working copy)
    @@ -31,6 +31,9 @@
       SysUtils, Classes, dos, LazUTF8
       {$IFDEF EnableIconvEnc},iconvenc{$ENDIF};
     
    +var
    +  ConvertEncodingFromUtf8RaisesException: boolean = False;
    +
     //encoding names
     const
       EncodingUTF8 = 'utf8';
    @@ -7033,7 +7036,10 @@
           if i>=0 then begin
             Dest^:=chr(i);
             inc(Dest);
    -      end;
    +      end
    +      else
    +      if ConvertEncodingFromUtf8RaisesException then
    +        raise EConvertError.Create('Cannot convert UTF8 to single byte');
         end;
       end;
       SetLength(Result,Dest-PChar(Result));
    
    conv-simple.diff (763 bytes)
  • simple2.diff (1,341 bytes)
    Index: components/lazutils/asiancodepagefunctions.inc
    ===================================================================
    --- components/lazutils/asiancodepagefunctions.inc	(revision 59403)
    +++ components/lazutils/asiancodepagefunctions.inc	(working copy)
    @@ -232,7 +232,10 @@
             else
               Dest^ := chr(i);
             Inc(Dest);
    -      end;
    +      end
    +      else
    +      if ConvertEncodingFromUtf8RaisesException then
    +        raise EConvertError.Create('Cannot convert UTF8 to DBCS code page');
         end;
       until false;
       //SetLength(Result, Dest - PChar(Result));
    Index: components/lazutils/lconvencoding.pas
    ===================================================================
    --- components/lazutils/lconvencoding.pas	(revision 59403)
    +++ components/lazutils/lconvencoding.pas	(working copy)
    @@ -31,6 +31,9 @@
       SysUtils, Classes, dos, LazUTF8
       {$IFDEF EnableIconvEnc},iconvenc{$ENDIF};
     
    +var
    +  ConvertEncodingFromUtf8RaisesException: boolean = False;
    +
     //encoding names
     const
       EncodingUTF8 = 'utf8';
    @@ -7033,7 +7036,10 @@
           if i>=0 then begin
             Dest^:=chr(i);
             inc(Dest);
    -      end;
    +      end
    +      else
    +      if ConvertEncodingFromUtf8RaisesException then
    +        raise EConvertError.Create('Cannot convert UTF8 to single byte');
         end;
       end;
       SetLength(Result,Dest-PChar(Result));
    
    simple2.diff (1,341 bytes)

Activities

Alexey Tor.

2018-05-03 23:16

reporter  

c1.diff (43,370 bytes)
Index: components/lazutils/asiancodepagefunctions.inc
===================================================================
--- components/lazutils/asiancodepagefunctions.inc	(revision 57782)
+++ components/lazutils/asiancodepagefunctions.inc	(working copy)
@@ -132,7 +132,8 @@
 procedure InternalUTF8ToDBCS(const s: string; TargetCodePage: TSystemCodePage;
   SetTargetCodePage: boolean;
   {$IfNDef UseSystemCPConv}const UTF8CharConvFunc: TUnicodeToCharID;{$endif}
-  out TheResult: RawByteString); inline;
+  out TheResult: RawByteString;
+  ErrorMode: TEncodingErrorMode=eemIgnore); inline;
 begin
   {$ifdef UseSystemCPConv}
   TheResult:=s;
@@ -140,54 +141,59 @@
   if not SetTargetCodePage then
     SetCodePage(TheResult, CP_ACP, False);
   {$else}
-  TheResult:=UTF8ToDBCS(s,UTF8CharConvFunc);
+  TheResult:=UTF8ToDBCS(s,UTF8CharConvFunc,ErrorMode);
   if SetTargetCodePage then
     SetCodePage(TheResult, TargetCodePage, False);
   {$endif}
 end;
 
-function UTF8ToCP932(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP932(const s: string; SetTargetCodePage: boolean;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
-  InternalUTF8ToDBCS(s,932,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP932{$endif},Result);
+  InternalUTF8ToDBCS(s,932,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP932{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP936(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP936(const s: string; SetTargetCodePage: boolean;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
-  InternalUTF8ToDBCS(s,936,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP936{$endif},Result);
+  InternalUTF8ToDBCS(s,936,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP936{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP949(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP949(const s: string; SetTargetCodePage: boolean;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
-  InternalUTF8ToDBCS(s,949,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP949{$endif},Result);
+  InternalUTF8ToDBCS(s,949,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP949{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP950(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP950(const s: string; SetTargetCodePage: boolean;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
-  InternalUTF8ToDBCS(s,950,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP950{$endif},Result);
+  InternalUTF8ToDBCS(s,950,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP950{$endif},Result,ErrorMode);
 end;
 {$ELSE}
-function UTF8ToCP932(const s: string): string;
+function UTF8ToCP932(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result := UTF8ToDBCS(s, @UnicodeToCP932);
+  Result := UTF8ToDBCS(s, @UnicodeToCP932, ErrorMode);
 end;
 
-function UTF8ToCP936(const s: string): string;
+function UTF8ToCP936(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result := UTF8ToDBCS(s, @UnicodeToCP936);
+  Result := UTF8ToDBCS(s, @UnicodeToCP936, ErrorMode);
 end;
 
-function UTF8ToCP949(const s: string): string;
+function UTF8ToCP949(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result := UTF8ToDBCS(s, @UnicodeToCP949);
+  Result := UTF8ToDBCS(s, @UnicodeToCP949, ErrorMode);
 end;
 
-function UTF8ToCP950(const s: string): string;
+function UTF8ToCP950(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result := UTF8ToDBCS(s, @UnicodeToCP950);
+  Result := UTF8ToDBCS(s, @UnicodeToCP950, ErrorMode);
 end;
 {$ENDIF}
 
-function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string;
+function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string;
 var
   len:  integer;
   Src:  PChar;
@@ -232,6 +238,19 @@
         else
           Dest^ := chr(i);
         Inc(Dest);
+      end
+      else
+      case ErrorMode of
+        eemReplace:
+          begin
+            Dest^ := EncodingReplacementChar;
+            Inc(Dest);
+          end;
+        eemException:
+          begin
+            raise EConvertError.Create(
+              Format('LConvEncoding cannot convert Unicode char %d', [Unicode]));
+          end;
       end;
     end;
   until false;
Index: components/lazutils/lconvencoding.pas
===================================================================
--- components/lazutils/lconvencoding.pas	(revision 57782)
+++ components/lazutils/lconvencoding.pas	(working copy)
@@ -75,14 +75,26 @@
   UTF32BEBOM = #0#0#$FE#$FF;
   UTF32LEBOM = #$FE#$FF#0#0;
 
+type
+  TEncodingErrorMode = (
+    eemIgnore,
+    eemReplace,
+    eemException
+    );
+
+var
+  EncodingReplacementChar: char = '?';
+
 function GuessEncoding(const s: string): string;
 
 function ConvertEncodingFromUTF8(const s, ToEncoding: string; out Encoded: boolean
-  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}): string;
+  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}
+  ; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 function ConvertEncodingToUTF8(const s, FromEncoding: string; out Encoded: boolean): string;
 // For UTF8 use the above functions, they save you one parameter
 function ConvertEncoding(const s, FromEncoding, ToEncoding: string
-  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}): string;
+  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}
+  ; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 
 // This routine should obtain the encoding utilized by ansistring in the RTL
 function GetDefaultTextEncoding: string;
@@ -95,9 +107,12 @@
 type
   TConvertEncodingFunction = function(const s: string): string;
   {$ifdef FPC_HAS_CPSTRING}
-  TConvertUTF8ToEncodingFunc = function(const s: string; SetTargetCodePage: boolean = false): RawByteString;
+  TConvertUTF8ToEncodingFunc = function(const s: string;
+    SetTargetCodePage: boolean = false;
+    ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
   {$else}
-  TConvertUTF8ToEncodingFunc = function(const s: string): string;
+  TConvertUTF8ToEncodingFunc = function(const s: string;
+    ErrorMode: TEncodingErrorMode=eemIgnore): string;
   {$endif}
   TCharToUTF8Table = array[char] of PChar;
   TUnicodeToCharID = function(Unicode: cardinal): integer;
@@ -131,52 +146,95 @@
 
 function UTF8ToUTF8BOM(const s: string): string; // UTF8 with BOM
 {$ifdef FPC_HAS_CPSTRING}
-function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean = false): RawByteString; // central europe
-function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean = false): RawByteString; // eastern europe
-function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Western European languages
-function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean = false): RawByteString; // central europe
-function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean = false): RawByteString; // cyrillic
-function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean = false): RawByteString; // latin 1
-function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean = false): RawByteString; // greek
-function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean = false): RawByteString; // turkish
-function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean = false): RawByteString; // hebrew
-function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean = false): RawByteString; // arabic
-function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean = false): RawByteString; // baltic
-function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean = false): RawByteString; // vietnam
-function UTF8ToCP437(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // DOS central europe
-function UTF8ToCP850(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // DOS western europe
-function UTF8ToCP852(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // DOS central europe
-function UTF8ToCP866(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // DOS and Windows console's cyrillic
-function UTF8ToCP874(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // thai
-function UTF8ToKOI8(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // russian cyrillic
-function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // ukrainian cyrillic
-function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // belarussian cyrillic
-function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // Macintosh, alias Mac OS Roman
+function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // central europe
+function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // eastern europe
+function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // Western European languages
+function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // central europe
+function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // cyrillic
+function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // latin 1
+function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // greek
+function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // turkish
+function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // hebrew
+function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // arabic
+function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // baltic
+function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // vietnam
+function UTF8ToCP437(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // DOS central europe
+function UTF8ToCP850(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // DOS western europe
+function UTF8ToCP852(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // DOS central europe
+function UTF8ToCP866(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // DOS and Windows console's cyrillic
+function UTF8ToCP874(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // thai
+function UTF8ToKOI8(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // russian cyrillic
+function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // ukrainian cyrillic
+function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // belarussian cyrillic
+function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;  // Macintosh, alias Mac OS Roman
 {$ELSE}
-function UTF8ToISO_8859_1(const s: string): string; // central europe
-function UTF8ToISO_8859_15(const s: string): string; // Western European languages
-function UTF8ToISO_8859_2(const s: string): string; // eastern europe
-function UTF8ToCP1250(const s: string): string; // central europe
-function UTF8ToCP1251(const s: string): string; // cyrillic
-function UTF8ToCP1252(const s: string): string; // latin 1
-function UTF8ToCP1253(const s: string): string; // greek
-function UTF8ToCP1254(const s: string): string; // turkish
-function UTF8ToCP1255(const s: string): string; // hebrew
-function UTF8ToCP1256(const s: string): string; // arabic
-function UTF8ToCP1257(const s: string): string; // baltic
-function UTF8ToCP1258(const s: string): string; // vietnam
-function UTF8ToCP437(const s: string): string;  // DOS central europe
-function UTF8ToCP850(const s: string): string;  // DOS western europe
-function UTF8ToCP852(const s: string): string;  // DOS central europe
-function UTF8ToCP866(const s: string): string;  // DOS and Windows console's cyrillic
-function UTF8ToCP874(const s: string): string;  // thai
-function UTF8ToKOI8(const s: string): string;  // russian cyrillic
-function UTF8ToKOI8U(const s: string): string;  // ukrainian cyrillic
-function UTF8ToKOI8RU(const s: string): string;  // belarussian cyrillic
-function UTF8ToMacintosh(const s: string): string;  // Macintosh, alias Mac OS Roman
+function UTF8ToISO_8859_1(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string; // central europe
+function UTF8ToISO_8859_15(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string; // Western European languages
+function UTF8ToISO_8859_2(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string; // eastern europe
+function UTF8ToCP1250(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string; // central europe
+function UTF8ToCP1251(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string; // cyrillic
+function UTF8ToCP1252(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string; // latin 1
+function UTF8ToCP1253(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string; // greek
+function UTF8ToCP1254(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string; // turkish
+function UTF8ToCP1255(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string; // hebrew
+function UTF8ToCP1256(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string; // arabic
+function UTF8ToCP1257(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string; // baltic
+function UTF8ToCP1258(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string; // vietnam
+function UTF8ToCP437(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // DOS central europe
+function UTF8ToCP850(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // DOS western europe
+function UTF8ToCP852(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // DOS central europe
+function UTF8ToCP866(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // DOS and Windows console's cyrillic
+function UTF8ToCP874(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // thai
+function UTF8ToKOI8(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // russian cyrillic
+function UTF8ToKOI8U(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // ukrainian cyrillic
+function UTF8ToKOI8RU(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // belarussian cyrillic
+function UTF8ToMacintosh(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string;  // Macintosh, alias Mac OS Roman
 {$ENDIF}
 // custom conversion
-function UTF8ToSingleByte(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string;
+function UTF8ToSingleByte(const s: string; const UTF8CharConvFunc: TUnicodeToCharID;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string;
 
 function UTF8ToUCS2LE(const s: string): string; // UCS2-LE 2byte little endian without BOM
 function UTF8ToUCS2BE(const s: string): string; // UCS2-BE 2byte big endian without BOM
@@ -191,18 +249,27 @@
 function DBCSToUTF8(const s: string; CodeP: integer): string;
 
 {$ifdef FPC_HAS_CPSTRING}
-function UTF8ToCP932(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Japanese
-function UTF8ToCP936(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
-function UTF8ToCP949(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Korea
-function UTF8ToCP950(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Chinese Complex
+function UTF8ToCP932(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // Japanese
+function UTF8ToCP936(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
+function UTF8ToCP949(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // Korea
+function UTF8ToCP950(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString; // Chinese Complex
 {$ELSE}
-function UTF8ToCP932(const s: string): string;      // Japanese
-function UTF8ToCP936(const s: string): string;      // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
-function UTF8ToCP949(const s: string): string;      // Korea
-function UTF8ToCP950(const s: string): string;      // Chinese Complex
+function UTF8ToCP932(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string;      // Japanese
+function UTF8ToCP936(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string;      // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
+function UTF8ToCP949(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string;      // Korea
+function UTF8ToCP950(const s: string;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string;      // Chinese Complex
 {$ENDIF}
 // Common function used by all UTF8ToXXX functions.
-function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string;
+function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string;
 {$ENDIF}
 
 procedure GetSupportedEncodings(List: TStrings);
@@ -6763,7 +6830,8 @@
 procedure InternalUTF8ToCP(const s: string; TargetCodePage: TSystemCodePage;
   SetTargetCodePage: boolean;
   const UTF8CharConvFunc: TUnicodeToCharID;
-  out TheResult: RawByteString); inline;
+  out TheResult: RawByteString;
+  ErrorMode: TEncodingErrorMode=eemIgnore); inline;
 begin
   if not Assigned(UTF8CharConvFunc) then
   begin
@@ -6772,234 +6840,235 @@
     if not SetTargetCodePage then
       SetCodePage(TheResult, CP_ACP, False);
   end else begin
-    TheResult:=UTF8ToSingleByte(s,UTF8CharConvFunc);
+    TheResult:=UTF8ToSingleByte(s,UTF8CharConvFunc,ErrorMode);
     if SetTargetCodePage then
       SetCodePage(TheResult, TargetCodePage, False);
   end;
 end;
 
-function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
-  InternalUTF8ToCP(s,28591,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_1{$endif},Result);
+  InternalUTF8ToCP(s,28591,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_1{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
-  InternalUTF8ToCP(s,28592,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_2{$endif},Result);
+  InternalUTF8ToCP(s,28592,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_2{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
-  InternalUTF8ToCP(s,28605,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_15{$endif},Result);
+  InternalUTF8ToCP(s,28605,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_15{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
   // system conversion fails for character #129 -> using table
-  InternalUTF8ToCP(s,1250,SetTargetCodePage,@UnicodeToCP1250,Result);
+  InternalUTF8ToCP(s,1250,SetTargetCodePage,@UnicodeToCP1250,Result,ErrorMode);
 end;
 
-function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
   // system conversion fails for character #152 -> using table
-  InternalUTF8ToCP(s,1251,SetTargetCodePage,@UnicodeToCP1251,Result);
+  InternalUTF8ToCP(s,1251,SetTargetCodePage,@UnicodeToCP1251,Result,ErrorMode);
 end;
 
-function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
   // system conversion fails for character #128 -> using table
-  InternalUTF8ToCP(s,1252,SetTargetCodePage,@UnicodeToCP1252,Result);
+  InternalUTF8ToCP(s,1252,SetTargetCodePage,@UnicodeToCP1252,Result,ErrorMode);
 end;
 
-function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
   // system conversion fails for character #129 -> using table
-  InternalUTF8ToCP(s,1253,SetTargetCodePage,@UnicodeToCP1253,Result);
+  InternalUTF8ToCP(s,1253,SetTargetCodePage,@UnicodeToCP1253,Result,ErrorMode);
 end;
 
-function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
   // system conversion fails for character #129 -> using table
-  InternalUTF8ToCP(s,1254,SetTargetCodePage,@UnicodeToCP1254,Result);
+  InternalUTF8ToCP(s,1254,SetTargetCodePage,@UnicodeToCP1254,Result,ErrorMode);
 end;
 
-function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
   // system conversion fails for character #129 -> using table
-  InternalUTF8ToCP(s,1255,SetTargetCodePage,@UnicodeToCP1255,Result);
+  InternalUTF8ToCP(s,1255,SetTargetCodePage,@UnicodeToCP1255,Result,ErrorMode);
 end;
 
-function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
-  InternalUTF8ToCP(s,1256,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP1256{$endif},Result);
+  InternalUTF8ToCP(s,1256,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP1256{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
   // system conversion fails for character #129 -> using table
-  InternalUTF8ToCP(s,1257,SetTargetCodePage,@UnicodeToCP1257,Result);
+  InternalUTF8ToCP(s,1257,SetTargetCodePage,@UnicodeToCP1257,Result,ErrorMode);
 end;
 
-function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
   // system conversion fails for character #129 -> using table
-  InternalUTF8ToCP(s,1258,SetTargetCodePage,@UnicodeToCP1258,Result);
+  InternalUTF8ToCP(s,1258,SetTargetCodePage,@UnicodeToCP1258,Result,ErrorMode);
 end;
 
-function UTF8ToCP437(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP437(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
-  InternalUTF8ToCP(s,437,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP437{$endif},Result);
+  InternalUTF8ToCP(s,437,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP437{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP850(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP850(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
-  InternalUTF8ToCP(s,850,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP850{$endif},Result);
+  InternalUTF8ToCP(s,850,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP850{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP852(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP852(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
-  InternalUTF8ToCP(s,852,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP852{$endif},Result);
+  InternalUTF8ToCP(s,852,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP852{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP866(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP866(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
-  InternalUTF8ToCP(s,866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP866{$endif},Result);
+  InternalUTF8ToCP(s,866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP866{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP874(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP874(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
   // system conversion fails for character #129 -> using table
-  InternalUTF8ToCP(s,874,SetTargetCodePage,@UnicodeToCP874,Result);
+  InternalUTF8ToCP(s,874,SetTargetCodePage,@UnicodeToCP874,Result,ErrorMode);
 end;
 
-function UTF8ToKOI8(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToKOI8(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
-  InternalUTF8ToCP(s,20866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8{$endif},Result);
+  InternalUTF8ToCP(s,20866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
-  InternalUTF8ToCP(s,21866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8U{$endif},Result);
+  InternalUTF8ToCP(s,21866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8U{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
-  InternalUTF8ToCP(s,21866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8RU{$endif},Result);
+  InternalUTF8ToCP(s,21866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8RU{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemIgnore): RawByteString;
 begin
-  InternalUTF8ToCP(s,10000,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToMacintosh{$endif},Result);
+  InternalUTF8ToCP(s,10000,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToMacintosh{$endif},Result,ErrorMode);
 end;
 {$ELSE}
-function UTF8ToISO_8859_1(const s: string): string;
+function UTF8ToISO_8859_1(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_1);
+  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_1,ErrorMode);
 end;
 
-function UTF8ToISO_8859_15(const s: string): string;
+function UTF8ToISO_8859_15(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_15);
+  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_15,ErrorMode);
 end;
 
-function UTF8ToISO_8859_2(const s: string): string;
+function UTF8ToISO_8859_2(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_2);
+  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_2,ErrorMode);
 end;
 
-function UTF8ToCP1250(const s: string): string;
+function UTF8ToCP1250(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1250);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1250,ErrorMode);
 end;
 
-function UTF8ToCP1251(const s: string): string;
+function UTF8ToCP1251(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1251);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1251,ErrorMode);
 end;
 
-function UTF8ToCP1252(const s: string): string;
+function UTF8ToCP1252(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1252);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1252,ErrorMode);
 end;
 
-function UTF8ToCP1253(const s: string): string;
+function UTF8ToCP1253(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1253);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1253,ErrorMode);
 end;
 
-function UTF8ToCP1254(const s: string): string;
+function UTF8ToCP1254(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1254);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1254,ErrorMode);
 end;
 
-function UTF8ToCP1255(const s: string): string;
+function UTF8ToCP1255(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1255);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1255,ErrorMode);
 end;
 
-function UTF8ToCP1256(const s: string): string;
+function UTF8ToCP1256(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1256);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1256,ErrorMode);
 end;
 
-function UTF8ToCP1257(const s: string): string;
+function UTF8ToCP1257(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1257);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1257,ErrorMode);
 end;
 
-function UTF8ToCP1258(const s: string): string;
+function UTF8ToCP1258(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1258);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1258,ErrorMode);
 end;
 
-function UTF8ToCP437(const s: string): string;
+function UTF8ToCP437(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP437);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP437,ErrorMode);
 end;
 
-function UTF8ToCP850(const s: string): string;
+function UTF8ToCP850(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP850);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP850,ErrorMode);
 end;
 
-function UTF8ToCP852(const s: string): string;
+function UTF8ToCP852(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP852);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP852,ErrorMode);
 end;
 
-function UTF8ToCP866(const s: string): string;
+function UTF8ToCP866(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP866);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP866,ErrorMode);
 end;
 
-function UTF8ToCP874(const s: string): string;
+function UTF8ToCP874(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP874);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP874,ErrorMode);
 end;
 
-function UTF8ToKOI8(const s: string): string;
+function UTF8ToKOI8(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8);
+  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8,ErrorMode);
 end;
 
-function UTF8ToKOI8U(const s: string): string;
+function UTF8ToKOI8U(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8U);
+  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8U,ErrorMode);
 end;
 
-function UTF8ToKOI8RU(const s: string): string;
+function UTF8ToKOI8RU(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8RU);
+  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8RU,ErrorMode);
 end;
 
-function UTF8ToMacintosh(const s: string): string;
+function UTF8ToMacintosh(const s: string; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToMacintosh);
+  Result:=UTF8ToSingleByte(s,@UnicodeToMacintosh,ErrorMode);
 end;
 {$ENDIF}
 
 function UTF8ToSingleByte(const s: string;
-  const UTF8CharConvFunc: TUnicodeToCharID): string;
+  const UTF8CharConvFunc: TUnicodeToCharID;
+  ErrorMode: TEncodingErrorMode=eemIgnore): string;
 var
   len: Integer;
   Src: PChar;
@@ -7033,6 +7102,19 @@
       if i>=0 then begin
         Dest^:=chr(i);
         inc(Dest);
+      end
+      else
+      case ErrorMode of
+        eemReplace:
+          begin
+            Dest^:=EncodingReplacementChar;
+            inc(Dest);
+          end;
+        eemException:
+          begin;
+            raise EConvertError.Create(
+              Format('LConvEncoding cannot convert Unicode char %d', [Unicode]));
+          end;
       end;
     end;
   end;
@@ -7286,7 +7368,8 @@
 
 
 function ConvertEncodingFromUTF8(const s, ToEncoding: string; out Encoded: boolean
-  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}): string;
+  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}
+  ; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 var
   ATo: string;
 
@@ -7304,36 +7387,36 @@
   ATo:=NormalizeEncoding(ToEncoding);
 
   if ATo=EncodingUTF8BOM then begin Result:=UTF8ToUTF8BOM(s); exit; end;
-  if ATo=EncodingCPIso1 then begin Result:=UTF8ToISO_8859_1(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCPIso15 then begin Result:=UTF8ToISO_8859_15(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCPIso2 then begin Result:=UTF8ToISO_8859_2(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1250 then begin Result:=UTF8ToCP1250(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1251 then begin Result:=UTF8ToCP1251(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1252 then begin Result:=UTF8ToCP1252(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1253 then begin Result:=UTF8ToCP1253(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1254 then begin Result:=UTF8ToCP1254(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1255 then begin Result:=UTF8ToCP1255(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1256 then begin Result:=UTF8ToCP1256(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1257 then begin Result:=UTF8ToCP1257(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1258 then begin Result:=UTF8ToCP1258(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP437 then begin Result:=UTF8ToCP437(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP850 then begin Result:=UTF8ToCP850(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP852 then begin Result:=UTF8ToCP852(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP866 then begin Result:=UTF8ToCP866(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP874 then begin Result:=UTF8ToCP874(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
+  if ATo=EncodingCPIso1 then begin Result:=UTF8ToISO_8859_1(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCPIso15 then begin Result:=UTF8ToISO_8859_15(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCPIso2 then begin Result:=UTF8ToISO_8859_2(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1250 then begin Result:=UTF8ToCP1250(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1251 then begin Result:=UTF8ToCP1251(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1252 then begin Result:=UTF8ToCP1252(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1253 then begin Result:=UTF8ToCP1253(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1254 then begin Result:=UTF8ToCP1254(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1255 then begin Result:=UTF8ToCP1255(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1256 then begin Result:=UTF8ToCP1256(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1257 then begin Result:=UTF8ToCP1257(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1258 then begin Result:=UTF8ToCP1258(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP437 then begin Result:=UTF8ToCP437(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP850 then begin Result:=UTF8ToCP850(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP852 then begin Result:=UTF8ToCP852(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP866 then begin Result:=UTF8ToCP866(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP874 then begin Result:=UTF8ToCP874(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
   {$IFnDEF DisableAsianCodePages}
-  if ATo=EncodingCP936 then begin Result:=UTF8ToCP936(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP950 then begin Result:=UTF8ToCP950(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP949 then begin Result:=UTF8ToCP949(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP932 then begin Result:=UTF8ToCP932(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
+  if ATo=EncodingCP936 then begin Result:=UTF8ToCP936(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP950 then begin Result:=UTF8ToCP950(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP949 then begin Result:=UTF8ToCP949(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP932 then begin Result:=UTF8ToCP932(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
   {$ENDIF}
-  if ATo=EncodingCPKOI8 then begin Result:=UTF8ToKOI8(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCPMac then begin Result:=UTF8ToMacintosh(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
+  if ATo=EncodingCPKOI8 then begin Result:=UTF8ToKOI8(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCPMac then begin Result:=UTF8ToMacintosh(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
   if ATo=EncodingUCS2LE then begin {$ifdef FPC_HAS_CPSTRING}CheckKeepCP;{$endif} Result:=UTF8ToUCS2LE(s); exit; end;
   if ATo=EncodingUCS2BE then begin {$ifdef FPC_HAS_CPSTRING}CheckKeepCP;{$endif} Result:=UTF8ToUCS2BE(s); exit; end;
 
   if (ATo=GetDefaultTextEncoding) and Assigned(ConvertUTF8ToAnsi) then begin
-    Result:=ConvertUTF8ToAnsi(s);
+    Result:=ConvertUTF8ToAnsi(s{$ifdef FPC_HAS_CPSTRING},false{$endif},ErrorMode);
     exit;
   end;
 
@@ -7386,7 +7469,8 @@
 end;
 
 function ConvertEncoding(const s, FromEncoding, ToEncoding: string
-  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean{$endif}): string;
+  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean{$endif}
+  ; ErrorMode: TEncodingErrorMode=eemIgnore): string;
 var
   AFrom, ATo, SysEnc : String;
   Encoded : Boolean;
@@ -7414,7 +7498,7 @@
   //DebugLn(['ConvertEncoding ',AFrom,' ',ATo]);
 
   if AFrom=EncodingUTF8 then begin
-    Result:=ConvertEncodingFromUTF8(s, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif});
+    Result:=ConvertEncodingFromUTF8(s, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif}, ErrorMode);
     if Encoded then exit;
   end
   else
@@ -7426,7 +7510,7 @@
   begin
     Result:=ConvertEncodingToUTF8(s, AFrom, Encoded);
     if Encoded then
-      Result:=ConvertEncodingFromUTF8(Result, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif});
+      Result:=ConvertEncodingFromUTF8(Result, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif}, ErrorMode);
     if Encoded then exit;
   end;
 
c1.diff (43,370 bytes)

Juha Manninen

2018-05-15 00:10

developer   ~0108300

Is it necessary to add an extra parameter to so many functions?
Maybe a global variable for ErrorMode would do?
I don't know. I have not needed the feature myself so far.

Alexey Tor.

2018-05-15 00:46

reporter   ~0108301

Last edited: 2018-05-15 00:47

View 2 revisions

Global var: not sure it is good.
For ex: I set this global var to "exception" for Cudatext, and some other Lazarus part will make exception then - w/out try/except...

With local param: no problem

jamie philbrook

2018-05-15 01:05

reporter   ~0108302

Last edited: 2018-05-15 01:06

View 2 revisions

Please don't lard up the function calling, it's bad enough now...

If you want a generalized error reporting, then implement a Style of
variable like the IO systems does..

 UTF8Error:Itneger;
  
  Each time a UTF8 function gets used it can initially clear this variable,
 do the work and then exit;

 your CutaText can test for the Variable results if it needs to.

 The rest of us can live with the way it is now and not lard up the calling
stack there by slowing things down and making apps larger..

Thank you.

Alexey Tor.

2018-05-15 08:36

reporter   ~0108304

jamie,
If i wont do a parameter, and do global var, see my reply above about global var. When to clear this var? unknown.
If convert-func clears this var, then var is useless, because it's reset each time.

Alexey Tor.

2018-05-15 08:37

reporter   ~0108305

jamie,
2) If CudaText reads this var, and other do not- bad - apps cannot do "replace" action and cannot do "exception" action like now.

Ondrej Pokorny

2018-05-15 13:30

reporter   ~0108311

> Is it necessary to add an extra parameter to so many functions?

Well you can make overloads to so many functions as well. It depends.

> Maybe a global variable for ErrorMode would do?

Definitely not. Folks, don't forget about multi threaded applications!!! Such functions have to be thread safe and you must be able to set the parameters thread-independently.

The only thing I can accept as a global variable is the error replacement string "?" But this could be solved with an overload as well.

+++

Btw. the behavior should default to "raise exception" - like StrToInt etc.

Alexey Tor.

2018-05-15 13:50

reporter   ~0108313

Ondrej,
default "exception": this will break some apps which don't expect this?

Alexey Tor.

2018-05-15 14:24

reporter  

c2.diff (44,083 bytes)
Index: components/lazutils/asiancodepagefunctions.inc
===================================================================
--- components/lazutils/asiancodepagefunctions.inc	(revision 57941)
+++ components/lazutils/asiancodepagefunctions.inc	(working copy)
@@ -132,7 +132,8 @@
 procedure InternalUTF8ToDBCS(const s: string; TargetCodePage: TSystemCodePage;
   SetTargetCodePage: boolean;
   {$IfNDef UseSystemCPConv}const UTF8CharConvFunc: TUnicodeToCharID;{$endif}
-  out TheResult: RawByteString); inline;
+  out TheResult: RawByteString;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue); inline;
 begin
   {$ifdef UseSystemCPConv}
   TheResult:=s;
@@ -140,54 +141,59 @@
   if not SetTargetCodePage then
     SetCodePage(TheResult, CP_ACP, False);
   {$else}
-  TheResult:=UTF8ToDBCS(s,UTF8CharConvFunc);
+  TheResult:=UTF8ToDBCS(s,UTF8CharConvFunc,ErrorMode);
   if SetTargetCodePage then
     SetCodePage(TheResult, TargetCodePage, False);
   {$endif}
 end;
 
-function UTF8ToCP932(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP932(const s: string; SetTargetCodePage: boolean;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
-  InternalUTF8ToDBCS(s,932,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP932{$endif},Result);
+  InternalUTF8ToDBCS(s,932,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP932{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP936(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP936(const s: string; SetTargetCodePage: boolean;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
-  InternalUTF8ToDBCS(s,936,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP936{$endif},Result);
+  InternalUTF8ToDBCS(s,936,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP936{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP949(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP949(const s: string; SetTargetCodePage: boolean;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
-  InternalUTF8ToDBCS(s,949,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP949{$endif},Result);
+  InternalUTF8ToDBCS(s,949,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP949{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP950(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP950(const s: string; SetTargetCodePage: boolean;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
-  InternalUTF8ToDBCS(s,950,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP950{$endif},Result);
+  InternalUTF8ToDBCS(s,950,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP950{$endif},Result,ErrorMode);
 end;
 {$ELSE}
-function UTF8ToCP932(const s: string): string;
+function UTF8ToCP932(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result := UTF8ToDBCS(s, @UnicodeToCP932);
+  Result := UTF8ToDBCS(s, @UnicodeToCP932, ErrorMode);
 end;
 
-function UTF8ToCP936(const s: string): string;
+function UTF8ToCP936(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result := UTF8ToDBCS(s, @UnicodeToCP936);
+  Result := UTF8ToDBCS(s, @UnicodeToCP936, ErrorMode);
 end;
 
-function UTF8ToCP949(const s: string): string;
+function UTF8ToCP949(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result := UTF8ToDBCS(s, @UnicodeToCP949);
+  Result := UTF8ToDBCS(s, @UnicodeToCP949, ErrorMode);
 end;
 
-function UTF8ToCP950(const s: string): string;
+function UTF8ToCP950(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result := UTF8ToDBCS(s, @UnicodeToCP950);
+  Result := UTF8ToDBCS(s, @UnicodeToCP950, ErrorMode);
 end;
 {$ENDIF}
 
-function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string;
+function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 var
   len:  integer;
   Src:  PChar;
@@ -232,6 +238,19 @@
         else
           Dest^ := chr(i);
         Inc(Dest);
+      end
+      else
+      case ErrorMode of
+        eemReplace:
+          begin
+            Dest^ := EncodingReplacementChar;
+            Inc(Dest);
+          end;
+        eemException:
+          begin
+            raise EConvertError.Create(
+              Format('LConvEncoding cannot convert Unicode char %d', [Unicode]));
+          end;
       end;
     end;
   until false;
Index: components/lazutils/lconvencoding.pas
===================================================================
--- components/lazutils/lconvencoding.pas	(revision 57941)
+++ components/lazutils/lconvencoding.pas	(working copy)
@@ -75,14 +75,29 @@
   UTF32BEBOM = #0#0#$FE#$FF;
   UTF32LEBOM = #$FE#$FF#0#0;
 
+type
+  TEncodingErrorMode = (
+    eemIgnore,
+    eemReplace,
+    eemException
+    );
+
+const
+  eemDefaultValue = eemIgnore;
+
+var
+  EncodingReplacementChar: char = '?';
+
 function GuessEncoding(const s: string): string;
 
 function ConvertEncodingFromUTF8(const s, ToEncoding: string; out Encoded: boolean
-  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}): string;
+  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}
+  ; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 function ConvertEncodingToUTF8(const s, FromEncoding: string; out Encoded: boolean): string;
 // For UTF8 use the above functions, they save you one parameter
 function ConvertEncoding(const s, FromEncoding, ToEncoding: string
-  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}): string;
+  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}
+  ; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 
 // This routine should obtain the encoding utilized by ansistring in the RTL
 function GetDefaultTextEncoding: string;
@@ -95,9 +110,12 @@
 type
   TConvertEncodingFunction = function(const s: string): string;
   {$ifdef FPC_HAS_CPSTRING}
-  TConvertUTF8ToEncodingFunc = function(const s: string; SetTargetCodePage: boolean = false): RawByteString;
+  TConvertUTF8ToEncodingFunc = function(const s: string;
+    SetTargetCodePage: boolean = false;
+    ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
   {$else}
-  TConvertUTF8ToEncodingFunc = function(const s: string): string;
+  TConvertUTF8ToEncodingFunc = function(const s: string;
+    ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
   {$endif}
   TCharToUTF8Table = array[char] of PChar;
   TUnicodeToCharID = function(Unicode: cardinal): integer;
@@ -131,52 +149,95 @@
 
 function UTF8ToUTF8BOM(const s: string): string; // UTF8 with BOM
 {$ifdef FPC_HAS_CPSTRING}
-function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean = false): RawByteString; // central europe
-function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean = false): RawByteString; // eastern europe
-function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Western European languages
-function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean = false): RawByteString; // central europe
-function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean = false): RawByteString; // cyrillic
-function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean = false): RawByteString; // latin 1
-function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean = false): RawByteString; // greek
-function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean = false): RawByteString; // turkish
-function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean = false): RawByteString; // hebrew
-function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean = false): RawByteString; // arabic
-function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean = false): RawByteString; // baltic
-function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean = false): RawByteString; // vietnam
-function UTF8ToCP437(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // DOS central europe
-function UTF8ToCP850(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // DOS western europe
-function UTF8ToCP852(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // DOS central europe
-function UTF8ToCP866(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // DOS and Windows console's cyrillic
-function UTF8ToCP874(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // thai
-function UTF8ToKOI8(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // russian cyrillic
-function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // ukrainian cyrillic
-function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // belarussian cyrillic
-function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean = false): RawByteString;  // Macintosh, alias Mac OS Roman
+function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // central europe
+function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // eastern europe
+function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // Western European languages
+function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // central europe
+function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // cyrillic
+function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // latin 1
+function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // greek
+function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // turkish
+function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // hebrew
+function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // arabic
+function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // baltic
+function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // vietnam
+function UTF8ToCP437(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // DOS central europe
+function UTF8ToCP850(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // DOS western europe
+function UTF8ToCP852(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // DOS central europe
+function UTF8ToCP866(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // DOS and Windows console's cyrillic
+function UTF8ToCP874(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // thai
+function UTF8ToKOI8(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // russian cyrillic
+function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // ukrainian cyrillic
+function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // belarussian cyrillic
+function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;  // Macintosh, alias Mac OS Roman
 {$ELSE}
-function UTF8ToISO_8859_1(const s: string): string; // central europe
-function UTF8ToISO_8859_15(const s: string): string; // Western European languages
-function UTF8ToISO_8859_2(const s: string): string; // eastern europe
-function UTF8ToCP1250(const s: string): string; // central europe
-function UTF8ToCP1251(const s: string): string; // cyrillic
-function UTF8ToCP1252(const s: string): string; // latin 1
-function UTF8ToCP1253(const s: string): string; // greek
-function UTF8ToCP1254(const s: string): string; // turkish
-function UTF8ToCP1255(const s: string): string; // hebrew
-function UTF8ToCP1256(const s: string): string; // arabic
-function UTF8ToCP1257(const s: string): string; // baltic
-function UTF8ToCP1258(const s: string): string; // vietnam
-function UTF8ToCP437(const s: string): string;  // DOS central europe
-function UTF8ToCP850(const s: string): string;  // DOS western europe
-function UTF8ToCP852(const s: string): string;  // DOS central europe
-function UTF8ToCP866(const s: string): string;  // DOS and Windows console's cyrillic
-function UTF8ToCP874(const s: string): string;  // thai
-function UTF8ToKOI8(const s: string): string;  // russian cyrillic
-function UTF8ToKOI8U(const s: string): string;  // ukrainian cyrillic
-function UTF8ToKOI8RU(const s: string): string;  // belarussian cyrillic
-function UTF8ToMacintosh(const s: string): string;  // Macintosh, alias Mac OS Roman
+function UTF8ToISO_8859_1(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // central europe
+function UTF8ToISO_8859_15(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // Western European languages
+function UTF8ToISO_8859_2(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // eastern europe
+function UTF8ToCP1250(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // central europe
+function UTF8ToCP1251(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // cyrillic
+function UTF8ToCP1252(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // latin 1
+function UTF8ToCP1253(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // greek
+function UTF8ToCP1254(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // turkish
+function UTF8ToCP1255(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // hebrew
+function UTF8ToCP1256(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // arabic
+function UTF8ToCP1257(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // baltic
+function UTF8ToCP1258(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string; // vietnam
+function UTF8ToCP437(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // DOS central europe
+function UTF8ToCP850(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // DOS western europe
+function UTF8ToCP852(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // DOS central europe
+function UTF8ToCP866(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // DOS and Windows console's cyrillic
+function UTF8ToCP874(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // thai
+function UTF8ToKOI8(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // russian cyrillic
+function UTF8ToKOI8U(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // ukrainian cyrillic
+function UTF8ToKOI8RU(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // belarussian cyrillic
+function UTF8ToMacintosh(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;  // Macintosh, alias Mac OS Roman
 {$ENDIF}
 // custom conversion
-function UTF8ToSingleByte(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string;
+function UTF8ToSingleByte(const s: string; const UTF8CharConvFunc: TUnicodeToCharID;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 
 function UTF8ToUCS2LE(const s: string): string; // UCS2-LE 2byte little endian without BOM
 function UTF8ToUCS2BE(const s: string): string; // UCS2-BE 2byte big endian without BOM
@@ -191,18 +252,27 @@
 function DBCSToUTF8(const s: string; CodeP: integer): string;
 
 {$ifdef FPC_HAS_CPSTRING}
-function UTF8ToCP932(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Japanese
-function UTF8ToCP936(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
-function UTF8ToCP949(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Korea
-function UTF8ToCP950(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Chinese Complex
+function UTF8ToCP932(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // Japanese
+function UTF8ToCP936(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
+function UTF8ToCP949(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // Korea
+function UTF8ToCP950(const s: string; SetTargetCodePage: boolean = false;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString; // Chinese Complex
 {$ELSE}
-function UTF8ToCP932(const s: string): string;      // Japanese
-function UTF8ToCP936(const s: string): string;      // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
-function UTF8ToCP949(const s: string): string;      // Korea
-function UTF8ToCP950(const s: string): string;      // Chinese Complex
+function UTF8ToCP932(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;      // Japanese
+function UTF8ToCP936(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;      // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
+function UTF8ToCP949(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;      // Korea
+function UTF8ToCP950(const s: string;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;      // Chinese Complex
 {$ENDIF}
 // Common function used by all UTF8ToXXX functions.
-function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string;
+function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 {$ENDIF}
 
 procedure GetSupportedEncodings(List: TStrings);
@@ -6763,7 +6833,8 @@
 procedure InternalUTF8ToCP(const s: string; TargetCodePage: TSystemCodePage;
   SetTargetCodePage: boolean;
   const UTF8CharConvFunc: TUnicodeToCharID;
-  out TheResult: RawByteString); inline;
+  out TheResult: RawByteString;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue); inline;
 begin
   if not Assigned(UTF8CharConvFunc) then
   begin
@@ -6772,234 +6843,235 @@
     if not SetTargetCodePage then
       SetCodePage(TheResult, CP_ACP, False);
   end else begin
-    TheResult:=UTF8ToSingleByte(s,UTF8CharConvFunc);
+    TheResult:=UTF8ToSingleByte(s,UTF8CharConvFunc,ErrorMode);
     if SetTargetCodePage then
       SetCodePage(TheResult, TargetCodePage, False);
   end;
 end;
 
-function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
-  InternalUTF8ToCP(s,28591,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_1{$endif},Result);
+  InternalUTF8ToCP(s,28591,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_1{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
-  InternalUTF8ToCP(s,28592,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_2{$endif},Result);
+  InternalUTF8ToCP(s,28592,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_2{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
-  InternalUTF8ToCP(s,28605,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_15{$endif},Result);
+  InternalUTF8ToCP(s,28605,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToISO_8859_15{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1250(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
   // system conversion fails for character #129 -> using table
-  InternalUTF8ToCP(s,1250,SetTargetCodePage,@UnicodeToCP1250,Result);
+  InternalUTF8ToCP(s,1250,SetTargetCodePage,@UnicodeToCP1250,Result,ErrorMode);
 end;
 
-function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1251(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
   // system conversion fails for character #152 -> using table
-  InternalUTF8ToCP(s,1251,SetTargetCodePage,@UnicodeToCP1251,Result);
+  InternalUTF8ToCP(s,1251,SetTargetCodePage,@UnicodeToCP1251,Result,ErrorMode);
 end;
 
-function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1252(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
   // system conversion fails for character #128 -> using table
-  InternalUTF8ToCP(s,1252,SetTargetCodePage,@UnicodeToCP1252,Result);
+  InternalUTF8ToCP(s,1252,SetTargetCodePage,@UnicodeToCP1252,Result,ErrorMode);
 end;
 
-function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1253(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
   // system conversion fails for character #129 -> using table
-  InternalUTF8ToCP(s,1253,SetTargetCodePage,@UnicodeToCP1253,Result);
+  InternalUTF8ToCP(s,1253,SetTargetCodePage,@UnicodeToCP1253,Result,ErrorMode);
 end;
 
-function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1254(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
   // system conversion fails for character #129 -> using table
-  InternalUTF8ToCP(s,1254,SetTargetCodePage,@UnicodeToCP1254,Result);
+  InternalUTF8ToCP(s,1254,SetTargetCodePage,@UnicodeToCP1254,Result,ErrorMode);
 end;
 
-function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1255(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
   // system conversion fails for character #129 -> using table
-  InternalUTF8ToCP(s,1255,SetTargetCodePage,@UnicodeToCP1255,Result);
+  InternalUTF8ToCP(s,1255,SetTargetCodePage,@UnicodeToCP1255,Result,ErrorMode);
 end;
 
-function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1256(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
-  InternalUTF8ToCP(s,1256,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP1256{$endif},Result);
+  InternalUTF8ToCP(s,1256,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP1256{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1257(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
   // system conversion fails for character #129 -> using table
-  InternalUTF8ToCP(s,1257,SetTargetCodePage,@UnicodeToCP1257,Result);
+  InternalUTF8ToCP(s,1257,SetTargetCodePage,@UnicodeToCP1257,Result,ErrorMode);
 end;
 
-function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP1258(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
   // system conversion fails for character #129 -> using table
-  InternalUTF8ToCP(s,1258,SetTargetCodePage,@UnicodeToCP1258,Result);
+  InternalUTF8ToCP(s,1258,SetTargetCodePage,@UnicodeToCP1258,Result,ErrorMode);
 end;
 
-function UTF8ToCP437(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP437(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
-  InternalUTF8ToCP(s,437,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP437{$endif},Result);
+  InternalUTF8ToCP(s,437,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP437{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP850(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP850(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
-  InternalUTF8ToCP(s,850,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP850{$endif},Result);
+  InternalUTF8ToCP(s,850,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP850{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP852(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP852(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
-  InternalUTF8ToCP(s,852,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP852{$endif},Result);
+  InternalUTF8ToCP(s,852,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP852{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP866(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP866(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
-  InternalUTF8ToCP(s,866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP866{$endif},Result);
+  InternalUTF8ToCP(s,866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToCP866{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToCP874(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToCP874(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
   // system conversion fails for character #129 -> using table
-  InternalUTF8ToCP(s,874,SetTargetCodePage,@UnicodeToCP874,Result);
+  InternalUTF8ToCP(s,874,SetTargetCodePage,@UnicodeToCP874,Result,ErrorMode);
 end;
 
-function UTF8ToKOI8(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToKOI8(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
-  InternalUTF8ToCP(s,20866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8{$endif},Result);
+  InternalUTF8ToCP(s,20866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToKOI8U(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
-  InternalUTF8ToCP(s,21866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8U{$endif},Result);
+  InternalUTF8ToCP(s,21866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8U{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToKOI8RU(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
-  InternalUTF8ToCP(s,21866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8RU{$endif},Result);
+  InternalUTF8ToCP(s,21866,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToKOI8RU{$endif},Result,ErrorMode);
 end;
 
-function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean): RawByteString;
+function UTF8ToMacintosh(const s: string; SetTargetCodePage: boolean; ErrorMode: TEncodingErrorMode=eemDefaultValue): RawByteString;
 begin
-  InternalUTF8ToCP(s,10000,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToMacintosh{$endif},Result);
+  InternalUTF8ToCP(s,10000,SetTargetCodePage,{$IfDef UseSystemCPConv}nil{$else}@UnicodeToMacintosh{$endif},Result,ErrorMode);
 end;
 {$ELSE}
-function UTF8ToISO_8859_1(const s: string): string;
+function UTF8ToISO_8859_1(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_1);
+  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_1,ErrorMode);
 end;
 
-function UTF8ToISO_8859_15(const s: string): string;
+function UTF8ToISO_8859_15(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_15);
+  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_15,ErrorMode);
 end;
 
-function UTF8ToISO_8859_2(const s: string): string;
+function UTF8ToISO_8859_2(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_2);
+  Result:=UTF8ToSingleByte(s,@UnicodeToISO_8859_2,ErrorMode);
 end;
 
-function UTF8ToCP1250(const s: string): string;
+function UTF8ToCP1250(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1250);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1250,ErrorMode);
 end;
 
-function UTF8ToCP1251(const s: string): string;
+function UTF8ToCP1251(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1251);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1251,ErrorMode);
 end;
 
-function UTF8ToCP1252(const s: string): string;
+function UTF8ToCP1252(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1252);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1252,ErrorMode);
 end;
 
-function UTF8ToCP1253(const s: string): string;
+function UTF8ToCP1253(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1253);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1253,ErrorMode);
 end;
 
-function UTF8ToCP1254(const s: string): string;
+function UTF8ToCP1254(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1254);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1254,ErrorMode);
 end;
 
-function UTF8ToCP1255(const s: string): string;
+function UTF8ToCP1255(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1255);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1255,ErrorMode);
 end;
 
-function UTF8ToCP1256(const s: string): string;
+function UTF8ToCP1256(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1256);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1256,ErrorMode);
 end;
 
-function UTF8ToCP1257(const s: string): string;
+function UTF8ToCP1257(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1257);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1257,ErrorMode);
 end;
 
-function UTF8ToCP1258(const s: string): string;
+function UTF8ToCP1258(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP1258);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP1258,ErrorMode);
 end;
 
-function UTF8ToCP437(const s: string): string;
+function UTF8ToCP437(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP437);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP437,ErrorMode);
 end;
 
-function UTF8ToCP850(const s: string): string;
+function UTF8ToCP850(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP850);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP850,ErrorMode);
 end;
 
-function UTF8ToCP852(const s: string): string;
+function UTF8ToCP852(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP852);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP852,ErrorMode);
 end;
 
-function UTF8ToCP866(const s: string): string;
+function UTF8ToCP866(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP866);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP866,ErrorMode);
 end;
 
-function UTF8ToCP874(const s: string): string;
+function UTF8ToCP874(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToCP874);
+  Result:=UTF8ToSingleByte(s,@UnicodeToCP874,ErrorMode);
 end;
 
-function UTF8ToKOI8(const s: string): string;
+function UTF8ToKOI8(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8);
+  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8,ErrorMode);
 end;
 
-function UTF8ToKOI8U(const s: string): string;
+function UTF8ToKOI8U(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8U);
+  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8U,ErrorMode);
 end;
 
-function UTF8ToKOI8RU(const s: string): string;
+function UTF8ToKOI8RU(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8RU);
+  Result:=UTF8ToSingleByte(s,@UnicodeToKOI8RU,ErrorMode);
 end;
 
-function UTF8ToMacintosh(const s: string): string;
+function UTF8ToMacintosh(const s: string; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 begin
-  Result:=UTF8ToSingleByte(s,@UnicodeToMacintosh);
+  Result:=UTF8ToSingleByte(s,@UnicodeToMacintosh,ErrorMode);
 end;
 {$ENDIF}
 
 function UTF8ToSingleByte(const s: string;
-  const UTF8CharConvFunc: TUnicodeToCharID): string;
+  const UTF8CharConvFunc: TUnicodeToCharID;
+  ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 var
   len: Integer;
   Src: PChar;
@@ -7033,6 +7105,19 @@
       if i>=0 then begin
         Dest^:=chr(i);
         inc(Dest);
+      end
+      else
+      case ErrorMode of
+        eemReplace:
+          begin
+            Dest^:=EncodingReplacementChar;
+            inc(Dest);
+          end;
+        eemException:
+          begin;
+            raise EConvertError.Create(
+              Format('LConvEncoding cannot convert Unicode char %d', [Unicode]));
+          end;
       end;
     end;
   end;
@@ -7286,7 +7371,8 @@
 
 
 function ConvertEncodingFromUTF8(const s, ToEncoding: string; out Encoded: boolean
-  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}): string;
+  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}
+  ; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 var
   ATo: string;
 
@@ -7304,36 +7390,36 @@
   ATo:=NormalizeEncoding(ToEncoding);
 
   if ATo=EncodingUTF8BOM then begin Result:=UTF8ToUTF8BOM(s); exit; end;
-  if ATo=EncodingCPIso1 then begin Result:=UTF8ToISO_8859_1(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCPIso15 then begin Result:=UTF8ToISO_8859_15(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCPIso2 then begin Result:=UTF8ToISO_8859_2(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1250 then begin Result:=UTF8ToCP1250(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1251 then begin Result:=UTF8ToCP1251(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1252 then begin Result:=UTF8ToCP1252(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1253 then begin Result:=UTF8ToCP1253(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1254 then begin Result:=UTF8ToCP1254(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1255 then begin Result:=UTF8ToCP1255(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1256 then begin Result:=UTF8ToCP1256(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1257 then begin Result:=UTF8ToCP1257(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP1258 then begin Result:=UTF8ToCP1258(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP437 then begin Result:=UTF8ToCP437(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP850 then begin Result:=UTF8ToCP850(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP852 then begin Result:=UTF8ToCP852(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP866 then begin Result:=UTF8ToCP866(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP874 then begin Result:=UTF8ToCP874(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
+  if ATo=EncodingCPIso1 then begin Result:=UTF8ToISO_8859_1(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCPIso15 then begin Result:=UTF8ToISO_8859_15(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCPIso2 then begin Result:=UTF8ToISO_8859_2(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1250 then begin Result:=UTF8ToCP1250(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1251 then begin Result:=UTF8ToCP1251(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1252 then begin Result:=UTF8ToCP1252(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1253 then begin Result:=UTF8ToCP1253(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1254 then begin Result:=UTF8ToCP1254(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1255 then begin Result:=UTF8ToCP1255(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1256 then begin Result:=UTF8ToCP1256(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1257 then begin Result:=UTF8ToCP1257(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP1258 then begin Result:=UTF8ToCP1258(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP437 then begin Result:=UTF8ToCP437(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP850 then begin Result:=UTF8ToCP850(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP852 then begin Result:=UTF8ToCP852(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP866 then begin Result:=UTF8ToCP866(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP874 then begin Result:=UTF8ToCP874(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
   {$IFnDEF DisableAsianCodePages}
-  if ATo=EncodingCP936 then begin Result:=UTF8ToCP936(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP950 then begin Result:=UTF8ToCP950(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP949 then begin Result:=UTF8ToCP949(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCP932 then begin Result:=UTF8ToCP932(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
+  if ATo=EncodingCP936 then begin Result:=UTF8ToCP936(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP950 then begin Result:=UTF8ToCP950(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP949 then begin Result:=UTF8ToCP949(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCP932 then begin Result:=UTF8ToCP932(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
   {$ENDIF}
-  if ATo=EncodingCPKOI8 then begin Result:=UTF8ToKOI8(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
-  if ATo=EncodingCPMac then begin Result:=UTF8ToMacintosh(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
+  if ATo=EncodingCPKOI8 then begin Result:=UTF8ToKOI8(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
+  if ATo=EncodingCPMac then begin Result:=UTF8ToMacintosh(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif},ErrorMode); exit; end;
   if ATo=EncodingUCS2LE then begin {$ifdef FPC_HAS_CPSTRING}CheckKeepCP;{$endif} Result:=UTF8ToUCS2LE(s); exit; end;
   if ATo=EncodingUCS2BE then begin {$ifdef FPC_HAS_CPSTRING}CheckKeepCP;{$endif} Result:=UTF8ToUCS2BE(s); exit; end;
 
   if (ATo=GetDefaultTextEncoding) and Assigned(ConvertUTF8ToAnsi) then begin
-    Result:=ConvertUTF8ToAnsi(s);
+    Result:=ConvertUTF8ToAnsi(s{$ifdef FPC_HAS_CPSTRING},false{$endif},ErrorMode);
     exit;
   end;
 
@@ -7386,7 +7472,8 @@
 end;
 
 function ConvertEncoding(const s, FromEncoding, ToEncoding: string
-  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean{$endif}): string;
+  {$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean{$endif}
+  ; ErrorMode: TEncodingErrorMode=eemDefaultValue): string;
 var
   AFrom, ATo, SysEnc : String;
   Encoded : Boolean;
@@ -7414,7 +7501,7 @@
   //DebugLn(['ConvertEncoding ',AFrom,' ',ATo]);
 
   if AFrom=EncodingUTF8 then begin
-    Result:=ConvertEncodingFromUTF8(s, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif});
+    Result:=ConvertEncodingFromUTF8(s, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif}, ErrorMode);
     if Encoded then exit;
   end
   else
@@ -7426,7 +7513,7 @@
   begin
     Result:=ConvertEncodingToUTF8(s, AFrom, Encoded);
     if Encoded then
-      Result:=ConvertEncodingFromUTF8(Result, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif});
+      Result:=ConvertEncodingFromUTF8(Result, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif}, ErrorMode);
     if Encoded then exit;
   end;
 
c2.diff (44,083 bytes)

Alexey Tor.

2018-05-15 14:25

reporter   ~0108315

c2.diff: added const eemDefaultValue - which is default parameter value, "ignore" now.

Ondrej Pokorny

2018-05-15 15:17

reporter   ~0108316

> default "exception": this will break some apps which don't expect this?

Not, unless you send invalid strings to them. Otherwise, yes, the programmers will need to adapt their code to handle invalid strings.

Alexey Tor.

2018-05-16 12:23

reporter   ~0108338

should I change eemDefaultValue to "exception"?

Ondrej Pokorny

2018-05-16 12:27

reporter   ~0108339

No need to send almost identical patches all over again. I just didn't have time to commit it yet. I'll do it in the evening :)

Alexey Tor.

2018-05-16 12:39

reporter   ~0108340

ok; and next step may be changing eemDefaultVal to "exception".

Alexey Tor.

2018-05-18 20:00

reporter   ~0108417

You wanted to apply it?

Ondrej Pokorny

2018-05-18 20:11

reporter   ~0108418

Yes, and I will apply it as soon as I will have a free time to fully review it. Sorry for the delays but are you really in such a hurry?

Alexey Tor.

2018-05-18 20:26

reporter   ~0108419

I can wait week-two.

Ondrej Pokorny

2018-05-23 11:24

reporter   ~0108489

Unfortunately, the patch is very trivial and cannot be applied. E.g. the error mode doesn't have any effect for several encodings in case UseSystemCPConv is set. Furthermore, I don't like the ErrorMode parameter at all and I don't like the ReplaceCahr global variable. Better would be to have several functions:
UTF8ToSingleByte (exception on error), TryUTF8SingleByte (returns false on error), UTF8ToSingleByteRepl (char replace on error). The functions should take the target codepage as parameter (from FPC syscodepages.inc).

All in all LConvEncoding needs a big rewrite to do this properly, for that I don't have the time now.

Alexey Tor.

2018-05-23 12:19

reporter   ~0108491

In this trivial state, it can be applied too. It improves code for my app.
App uses current LConvEnc codepages and don't use all FPC codepages. For app this patch is good, it allows to raise exception on convert-error.

Ondrej Pokorny

2018-05-23 12:55

reporter   ~0108492

I didn't say you must support all FPC codepages. But the patch even doesn't support all LConvEncoding codepages, e.g. CP 1256.

Alexey Tor.

2018-10-02 12:16

reporter  

conv-simple.diff (763 bytes)
Index: components/lazutils/lconvencoding.pas
===================================================================
--- components/lazutils/lconvencoding.pas	(revision 59166)
+++ components/lazutils/lconvencoding.pas	(working copy)
@@ -31,6 +31,9 @@
   SysUtils, Classes, dos, LazUTF8
   {$IFDEF EnableIconvEnc},iconvenc{$ENDIF};
 
+var
+  ConvertEncodingFromUtf8RaisesException: boolean = False;
+
 //encoding names
 const
   EncodingUTF8 = 'utf8';
@@ -7033,7 +7036,10 @@
       if i>=0 then begin
         Dest^:=chr(i);
         inc(Dest);
-      end;
+      end
+      else
+      if ConvertEncodingFromUtf8RaisesException then
+        raise EConvertError.Create('Cannot convert UTF8 to single byte');
     end;
   end;
   SetLength(Result,Dest-PChar(Result));
conv-simple.diff (763 bytes)

Alexey Tor.

2018-10-02 12:20

reporter   ~0111189

instead of that big patch, I added conv-simple.diff.
it adds one global var, which can be used by my app to raise exception, when UTF8 cannot convert to single byte codepage.
can you apply?

Juha Manninen

2018-10-31 18:34

developer   ~0111708

"conv-simple" patches function UTF8ToSingleByte in unit LConvEncoding.
Should also UTF8ToDBCS in asiancodepagefunctions.inc be patched?

Ideally all those functions should throw exception without any flags when the conversion is not possible. Then a user's code would take care of handling the error.
It may be too dramatic change for some use cases, I will not do it now.

Alexey Tor.

2018-10-31 19:42

reporter  

simple2.diff (1,341 bytes)
Index: components/lazutils/asiancodepagefunctions.inc
===================================================================
--- components/lazutils/asiancodepagefunctions.inc	(revision 59403)
+++ components/lazutils/asiancodepagefunctions.inc	(working copy)
@@ -232,7 +232,10 @@
         else
           Dest^ := chr(i);
         Inc(Dest);
-      end;
+      end
+      else
+      if ConvertEncodingFromUtf8RaisesException then
+        raise EConvertError.Create('Cannot convert UTF8 to DBCS code page');
     end;
   until false;
   //SetLength(Result, Dest - PChar(Result));
Index: components/lazutils/lconvencoding.pas
===================================================================
--- components/lazutils/lconvencoding.pas	(revision 59403)
+++ components/lazutils/lconvencoding.pas	(working copy)
@@ -31,6 +31,9 @@
   SysUtils, Classes, dos, LazUTF8
   {$IFDEF EnableIconvEnc},iconvenc{$ENDIF};
 
+var
+  ConvertEncodingFromUtf8RaisesException: boolean = False;
+
 //encoding names
 const
   EncodingUTF8 = 'utf8';
@@ -7033,7 +7036,10 @@
       if i>=0 then begin
         Dest^:=chr(i);
         inc(Dest);
-      end;
+      end
+      else
+      if ConvertEncodingFromUtf8RaisesException then
+        raise EConvertError.Create('Cannot convert UTF8 to single byte');
     end;
   end;
   SetLength(Result,Dest-PChar(Result));
simple2.diff (1,341 bytes)

Alexey Tor.

2018-10-31 19:43

reporter   ~0111714

Yes, asianNNN should be fixed too. added simple2.diff for it.

"Ideally"- this will give new behavour of user apps, so don't do it...

Juha Manninen

2018-11-01 09:40

developer   ~0111721

Applied, thanks.

Issue History

Date Modified Username Field Change
2018-05-03 23:12 Alexey Tor. New Issue
2018-05-03 23:16 Alexey Tor. File Added: c1.diff
2018-05-15 00:10 Juha Manninen Note Added: 0108300
2018-05-15 00:46 Alexey Tor. Note Added: 0108301
2018-05-15 00:47 Alexey Tor. Note Edited: 0108301 View Revisions
2018-05-15 01:05 jamie philbrook Note Added: 0108302
2018-05-15 01:06 jamie philbrook Note Edited: 0108302 View Revisions
2018-05-15 08:36 Alexey Tor. Note Added: 0108304
2018-05-15 08:37 Alexey Tor. Note Added: 0108305
2018-05-15 13:01 Ondrej Pokorny Assigned To => Ondrej Pokorny
2018-05-15 13:01 Ondrej Pokorny Status new => assigned
2018-05-15 13:30 Ondrej Pokorny Note Added: 0108311
2018-05-15 13:50 Alexey Tor. Note Added: 0108313
2018-05-15 14:24 Alexey Tor. File Added: c2.diff
2018-05-15 14:25 Alexey Tor. Note Added: 0108315
2018-05-15 15:17 Ondrej Pokorny Note Added: 0108316
2018-05-16 12:23 Alexey Tor. Note Added: 0108338
2018-05-16 12:27 Ondrej Pokorny Note Added: 0108339
2018-05-16 12:39 Alexey Tor. Note Added: 0108340
2018-05-18 20:00 Alexey Tor. Note Added: 0108417
2018-05-18 20:11 Ondrej Pokorny Note Added: 0108418
2018-05-18 20:26 Alexey Tor. Note Added: 0108419
2018-05-23 11:24 Ondrej Pokorny Note Added: 0108489
2018-05-23 12:19 Alexey Tor. Note Added: 0108491
2018-05-23 12:55 Ondrej Pokorny Note Added: 0108492
2018-10-02 12:16 Alexey Tor. File Added: conv-simple.diff
2018-10-02 12:20 Alexey Tor. Note Added: 0111189
2018-10-31 18:22 Juha Manninen Assigned To Ondrej Pokorny => Juha Manninen
2018-10-31 18:34 Juha Manninen LazTarget => -
2018-10-31 18:34 Juha Manninen Note Added: 0111708
2018-10-31 18:34 Juha Manninen Status assigned => feedback
2018-10-31 19:42 Alexey Tor. File Added: simple2.diff
2018-10-31 19:43 Alexey Tor. Note Added: 0111714
2018-10-31 19:43 Alexey Tor. Status feedback => assigned
2018-11-01 09:40 Juha Manninen Fixed in Revision => r59416
2018-11-01 09:40 Juha Manninen Note Added: 0111721
2018-11-01 09:40 Juha Manninen Status assigned => resolved
2018-11-01 09:40 Juha Manninen Resolution open => fixed