View Issue Details

IDProjectCategoryView StatusLast Update
0036622FPCCompilerpublic2020-02-03 15:38
ReporterJ. Gareth MoretonAssigned ToFlorian 
PrioritylowSeveritytweakReproducibilityN/A
Status resolvedResolutionfixed 
Platformi386 and x86_64OSMicrosoft WindowsOS Version10 Professional
Product Version3.3.1Product Buildr44021 
Target VersionFixed in Version3.3.1 
Summary0036622: [Patch] x86 SUB and LEA optimisations
DescriptionThis patch breaks some dependency chains in compiled code, as well as simplifying it in some cases:

- movl/q %reg1,%reg2; addl/q $x,%reg2 -> leal/q x(%reg1),%reg2 (so long as a conditional statement doesn't follow)

- movl/q %reg1,%reg2; subl/q $x,%reg2 -> leal/q -x(%reg1),%reg2 (so long as a conditional statement doesn't follow)

- subl/q $x,%reg1; movl/q %reg1,%reg2 -> leal/q -x(%reg1),%reg2; subl/q %x,%reg1 (won't perform when optimising for size)
Steps To ReproduceApply patch and confirm correct (and improved) compilation in i386 and x86_64 platforms
Additional InformationThe last one is notable in that it sometimes permits the removal of a cmp instruction if one immediately follows.
Tagscompiler, i386, optimizations, patch, x86, x86_64
Fixed in Revision44030
FPCOldBugId
FPCTarget-
Attached Files
  • sub-lea-optimisation.patch (5,472 bytes)
    Index: compiler/i386/aoptcpu.pas
    ===================================================================
    --- compiler/i386/aoptcpu.pas	(revision 44021)
    +++ compiler/i386/aoptcpu.pas	(working copy)
    @@ -242,6 +242,8 @@
                       Result:=OptPass2Jmp(p);
                     A_MOV:
                       Result:=OptPass2MOV(p);
    +                A_SUB:
    +                  Result:=OptPass2SUB(p);
                     else
                       ;
                   end;
    Index: compiler/x86/aoptx86.pas
    ===================================================================
    --- compiler/x86/aoptx86.pas	(revision 44021)
    +++ compiler/x86/aoptx86.pas	(working copy)
    @@ -82,6 +82,7 @@
             function OptPass2Jmp(var p : tai) : boolean;
             function OptPass2Jcc(var p : tai) : boolean;
             function OptPass2Lea(var p: tai): Boolean;
    +        function OptPass2SUB(var p: tai): Boolean;
     
             function PostPeepholeOptMov(var p : tai) : Boolean;
     {$ifdef x86_64} { These post-peephole optimisations only affect 64-bit registers. [Kit] }
    @@ -3365,6 +3366,7 @@
            end;
     
           var
    +        NewRef: TReference;
            hp1,hp2,hp3: tai;
     {$ifndef x86_64}
            hp4: tai;
    @@ -3393,6 +3395,48 @@
                   to the MOV instruction on this pass }
               end
             else if MatchOpType(taicpu(p),top_reg,top_reg) and
    +          (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
    +          MatchInstruction(hp1,A_ADD,A_SUB,[taicpu(p).opsize]) and
    +          MatchOpType(taicpu(hp1),top_const,top_reg) and
    +          (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
    +          begin
    +            { Change:
    +                movl/q %reg1,%reg2      movl/q %reg1,%reg2
    +                addl/q $x,%reg2         subl/q $x,%reg2
    +              To:
    +                leal/q x(%reg1),%reg2   leal/q -x(%reg1),%reg2
    +            }
    +            if not GetNextInstruction(hp1, hp2) or
    +              { The FLAGS register isn't always tracked properly, so do not
    +                perform this optimisation if a conditional statement follows }
    +              not MatchInstruction(hp2, [A_Jcc, A_SETcc, A_CMOVcc], []) then
    +              begin
    +                reference_reset(NewRef, 1, []);
    +                NewRef.base := taicpu(p).oper[0]^.reg;
    +                NewRef.scalefactor := 1;
    +
    +                if taicpu(hp1).opcode = A_ADD then
    +                  begin
    +                    DebugMsg(SPeepholeOptimization + 'MovAdd2Lea', p);
    +                    NewRef.offset := taicpu(hp1).oper[0]^.val;
    +                  end
    +                else
    +                  begin
    +                    DebugMsg(SPeepholeOptimization + 'MovSub2Lea', p);
    +                    NewRef.offset := -taicpu(hp1).oper[0]^.val;
    +                  end;
    +
    +                taicpu(p).opcode := A_LEA;
    +                taicpu(p).loadref(0, NewRef);
    +
    +                Asml.Remove(hp1);
    +                hp1.Free;
    +
    +                Result := True;
    +                Exit;
    +              end;
    +          end
    +        else if MatchOpType(taicpu(p),top_reg,top_reg) and
     {$ifdef x86_64}
               MatchInstruction(hp1,A_MOVZX,A_MOVSX,A_MOVSXD,[]) and
     {$else x86_64}
    @@ -4897,6 +4941,50 @@
           end;
     
     
    +    function TX86AsmOptimizer.OptPass2SUB(var p: tai): Boolean;
    +      var
    +        hp1, hp2: tai; NewRef: TReference;
    +      begin
    +        { Change:
    +            subl/q $x,%reg1
    +            movl/q %reg1,%reg2
    +          To:
    +            leal/q $-x(%reg1),%reg2
    +            subl/q $x,%reg1
    +
    +          Breaks the dependency chain and potentially permits the removal of
    +          a CMP instruction if one follows.
    +        }
    +        Result := False;
    +        if not (cs_opt_size in current_settings.optimizerswitches) and
    +          (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
    +          MatchOpType(taicpu(p),top_const,top_reg) and
    +          GetNextInstruction(p, hp1) and
    +          MatchInstruction(hp1, A_MOV, [taicpu(p).opsize]) and
    +          (taicpu(hp1).oper[1]^.typ = top_reg) and
    +          MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) then
    +          begin
    +            { Change the MOV instruction to a LEA instruction, and update the
    +              first operand }
    +            reference_reset(NewRef, 1, []);
    +            NewRef.base := taicpu(p).oper[1]^.reg;
    +            NewRef.scalefactor := 1;
    +            NewRef.offset := -taicpu(p).oper[0]^.val;
    +
    +            taicpu(hp1).opcode := A_LEA;
    +            taicpu(hp1).loadref(0, NewRef);
    +
    +            { Move what is now the LEA instruction to before the SUB instruction }
    +            Asml.Remove(hp1);
    +            Asml.InsertBefore(hp1, p);
    +            AllocRegBetween(taicpu(hp1).oper[1]^.reg, hp1, p, UsedRegs);
    +
    +            DebugMsg(SPeepholeOptimization + 'SubMov2LeaSub', p);
    +            Result := True;
    +          end;
    +      end;
    +
    +
         function TX86AsmOptimizer.PostPeepholeOptLea(var p : tai) : Boolean;
     
           function SkipSimpleInstructions(var hp1 : tai) : Boolean;
    Index: compiler/x86_64/aoptcpu.pas
    ===================================================================
    --- compiler/x86_64/aoptcpu.pas	(revision 44021)
    +++ compiler/x86_64/aoptcpu.pas	(working copy)
    @@ -154,6 +154,8 @@
                       Result:=OptPass2Jcc(p);
                     A_Lea:
                       Result:=OptPass2Lea(p);
    +                A_SUB:
    +                  Result:=OptPass2SUB(p);
                     else
                       ;
                   end;
    

Relationships

parent of 0036630 closedFlorian Win32 lazarus IDE raise Range check error exception on runtime. 

Activities

J. Gareth Moreton

2020-01-24 09:17

developer  

sub-lea-optimisation.patch (5,472 bytes)
Index: compiler/i386/aoptcpu.pas
===================================================================
--- compiler/i386/aoptcpu.pas	(revision 44021)
+++ compiler/i386/aoptcpu.pas	(working copy)
@@ -242,6 +242,8 @@
                   Result:=OptPass2Jmp(p);
                 A_MOV:
                   Result:=OptPass2MOV(p);
+                A_SUB:
+                  Result:=OptPass2SUB(p);
                 else
                   ;
               end;
Index: compiler/x86/aoptx86.pas
===================================================================
--- compiler/x86/aoptx86.pas	(revision 44021)
+++ compiler/x86/aoptx86.pas	(working copy)
@@ -82,6 +82,7 @@
         function OptPass2Jmp(var p : tai) : boolean;
         function OptPass2Jcc(var p : tai) : boolean;
         function OptPass2Lea(var p: tai): Boolean;
+        function OptPass2SUB(var p: tai): Boolean;
 
         function PostPeepholeOptMov(var p : tai) : Boolean;
 {$ifdef x86_64} { These post-peephole optimisations only affect 64-bit registers. [Kit] }
@@ -3365,6 +3366,7 @@
        end;
 
       var
+        NewRef: TReference;
        hp1,hp2,hp3: tai;
 {$ifndef x86_64}
        hp4: tai;
@@ -3393,6 +3395,48 @@
               to the MOV instruction on this pass }
           end
         else if MatchOpType(taicpu(p),top_reg,top_reg) and
+          (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
+          MatchInstruction(hp1,A_ADD,A_SUB,[taicpu(p).opsize]) and
+          MatchOpType(taicpu(hp1),top_const,top_reg) and
+          (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
+          begin
+            { Change:
+                movl/q %reg1,%reg2      movl/q %reg1,%reg2
+                addl/q $x,%reg2         subl/q $x,%reg2
+              To:
+                leal/q x(%reg1),%reg2   leal/q -x(%reg1),%reg2
+            }
+            if not GetNextInstruction(hp1, hp2) or
+              { The FLAGS register isn't always tracked properly, so do not
+                perform this optimisation if a conditional statement follows }
+              not MatchInstruction(hp2, [A_Jcc, A_SETcc, A_CMOVcc], []) then
+              begin
+                reference_reset(NewRef, 1, []);
+                NewRef.base := taicpu(p).oper[0]^.reg;
+                NewRef.scalefactor := 1;
+
+                if taicpu(hp1).opcode = A_ADD then
+                  begin
+                    DebugMsg(SPeepholeOptimization + 'MovAdd2Lea', p);
+                    NewRef.offset := taicpu(hp1).oper[0]^.val;
+                  end
+                else
+                  begin
+                    DebugMsg(SPeepholeOptimization + 'MovSub2Lea', p);
+                    NewRef.offset := -taicpu(hp1).oper[0]^.val;
+                  end;
+
+                taicpu(p).opcode := A_LEA;
+                taicpu(p).loadref(0, NewRef);
+
+                Asml.Remove(hp1);
+                hp1.Free;
+
+                Result := True;
+                Exit;
+              end;
+          end
+        else if MatchOpType(taicpu(p),top_reg,top_reg) and
 {$ifdef x86_64}
           MatchInstruction(hp1,A_MOVZX,A_MOVSX,A_MOVSXD,[]) and
 {$else x86_64}
@@ -4897,6 +4941,50 @@
       end;
 
 
+    function TX86AsmOptimizer.OptPass2SUB(var p: tai): Boolean;
+      var
+        hp1, hp2: tai; NewRef: TReference;
+      begin
+        { Change:
+            subl/q $x,%reg1
+            movl/q %reg1,%reg2
+          To:
+            leal/q $-x(%reg1),%reg2
+            subl/q $x,%reg1
+
+          Breaks the dependency chain and potentially permits the removal of
+          a CMP instruction if one follows.
+        }
+        Result := False;
+        if not (cs_opt_size in current_settings.optimizerswitches) and
+          (taicpu(p).opsize in [S_L{$ifdef x86_64}, S_Q{$endif x86_64}]) and
+          MatchOpType(taicpu(p),top_const,top_reg) and
+          GetNextInstruction(p, hp1) and
+          MatchInstruction(hp1, A_MOV, [taicpu(p).opsize]) and
+          (taicpu(hp1).oper[1]^.typ = top_reg) and
+          MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) then
+          begin
+            { Change the MOV instruction to a LEA instruction, and update the
+              first operand }
+            reference_reset(NewRef, 1, []);
+            NewRef.base := taicpu(p).oper[1]^.reg;
+            NewRef.scalefactor := 1;
+            NewRef.offset := -taicpu(p).oper[0]^.val;
+
+            taicpu(hp1).opcode := A_LEA;
+            taicpu(hp1).loadref(0, NewRef);
+
+            { Move what is now the LEA instruction to before the SUB instruction }
+            Asml.Remove(hp1);
+            Asml.InsertBefore(hp1, p);
+            AllocRegBetween(taicpu(hp1).oper[1]^.reg, hp1, p, UsedRegs);
+
+            DebugMsg(SPeepholeOptimization + 'SubMov2LeaSub', p);
+            Result := True;
+          end;
+      end;
+
+
     function TX86AsmOptimizer.PostPeepholeOptLea(var p : tai) : Boolean;
 
       function SkipSimpleInstructions(var hp1 : tai) : Boolean;
Index: compiler/x86_64/aoptcpu.pas
===================================================================
--- compiler/x86_64/aoptcpu.pas	(revision 44021)
+++ compiler/x86_64/aoptcpu.pas	(working copy)
@@ -154,6 +154,8 @@
                   Result:=OptPass2Jcc(p);
                 A_Lea:
                   Result:=OptPass2Lea(p);
+                A_SUB:
+                  Result:=OptPass2SUB(p);
                 else
                   ;
               end;

Florian

2020-01-24 22:10

administrator   ~0120722

Thanks, applied.

Issue History

Date Modified Username Field Change
2020-01-24 09:17 J. Gareth Moreton New Issue
2020-01-24 09:17 J. Gareth Moreton File Added: sub-lea-optimisation.patch
2020-01-24 09:17 J. Gareth Moreton Tag Attached: patch
2020-01-24 09:17 J. Gareth Moreton Tag Attached: compiler
2020-01-24 09:17 J. Gareth Moreton Tag Attached: optimizations
2020-01-24 09:17 J. Gareth Moreton Tag Attached: x86_64
2020-01-24 09:17 J. Gareth Moreton Tag Attached: x86
2020-01-24 09:17 J. Gareth Moreton Tag Attached: i386
2020-01-24 09:18 J. Gareth Moreton Priority normal => low
2020-01-24 09:18 J. Gareth Moreton Severity minor => tweak
2020-01-24 09:18 J. Gareth Moreton FPCTarget => -
2020-01-24 22:10 Florian Assigned To => Florian
2020-01-24 22:10 Florian Status new => resolved
2020-01-24 22:10 Florian Resolution open => fixed
2020-01-24 22:10 Florian Fixed in Version => 3.3.1
2020-01-24 22:10 Florian Fixed in Revision => 44030
2020-01-24 22:10 Florian Note Added: 0120722
2020-02-03 15:37 J. Gareth Moreton Status resolved => feedback
2020-02-03 15:37 J. Gareth Moreton Resolution fixed => reopened
2020-02-03 15:38 J. Gareth Moreton Relationship added parent of 0036630
2020-02-03 15:38 J. Gareth Moreton Status feedback => resolved
2020-02-03 15:38 J. Gareth Moreton Resolution reopened => fixed