View Issue Details

IDProjectCategoryView StatusLast Update
0036670FPCCompilerpublic2020-02-13 21:54
ReporterJ. Gareth Moreton Assigned ToFlorian  
PrioritylowSeveritytweakReproducibilityN/A
Status resolvedResolutionfixed 
Product Version3.3.1 
Fixed in Version3.3.1 
Summary0036670: [Patch / Refactor] x86 "OptPass1MOV" improvements - Part 2½
DescriptionThis patch refactors OptPass1MOV to remove optimisations that are no longer necessary or even executed thanks to the new DeepMOVOpt feature. Compiled source code should be no different. See Additional Information for a more detailed list of removed optimisations.
Steps To ReproduceApply patch and confirm identical compilation of unrelated source files.
Additional InformationRemoved optimisations:

- MovMov2Mov 2
- MovMov2Mov 4
- MovMov2Mov 6c
- Mov2Nop 2
- MovTest/Cmp/Or/AndJxx2Test/Cmp/Or/AndJxx*
- MovTest/Cmp/Or/AndJxx2MovTest/Cmp/Or/AndJxx*
- MovMovXX2MoVXX 1 (part of OptPass2MOV rather than OptPass1MOV)

* Though these optimisations were still executed, DeepMOVOpt performs equivalent optimisations
Tagscompiler, i386, optimization, patch, refactor, x86, x86_64
Fixed in Revision44164, 44166
FPCOldBugId
FPCTarget-
Attached Files

Activities

J. Gareth Moreton

2020-02-07 00:28

developer  

mov-redundant-strip.patch (7,733 bytes)   
Index: compiler/x86/aoptx86.pas
===================================================================
--- compiler/x86/aoptx86.pas	(revision 44106)
+++ compiler/x86/aoptx86.pas	(working copy)
@@ -2030,25 +2030,7 @@
 
                       with %treg is not used after }
                     case taicpu(p).oper[0]^.typ Of
-                      top_reg:
-                        begin
-                          { change
-                              mov %reg, %treg
-                              mov %treg, y
-
-                              to
-
-                              mov %reg, y
-                          }
-                          if taicpu(hp1).oper[1]^.typ=top_reg then
-                            AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
-                          taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
-                          DebugMsg(SPeepholeOptimization + 'MovMov2Mov 2 done',p);
-                          asml.remove(hp1);
-                          hp1.free;
-                          Result:=true;
-                          Exit;
-                        end;
+                      { top_reg is covered by DeepMOVOpt }
                       top_const:
                         begin
                           { change
@@ -2091,43 +2073,26 @@
                             Exit;
                           end;
                       else
-                        { Do nothing };
+                        ;
                     end
                   else
-                    { %treg is used afterwards }
-
-                    case taicpu(p).oper[0]^.typ of
-                      top_const:
-                        if
-                          (
-                            not (cs_opt_size in current_settings.optimizerswitches) or
-                            (taicpu(hp1).opsize = S_B)
-                          ) and
-                          (
-                            (taicpu(hp1).oper[1]^.typ = top_reg) or
-                            ((taicpu(p).oper[0]^.val >= low(longint)) and (taicpu(p).oper[0]^.val <= high(longint)))
-                          ) then
-                          begin
-                            DebugMsg(SPeepholeOptimization + debug_operstr(taicpu(hp1).oper[0]^) + ' = $' + debug_tostr(taicpu(p).oper[0]^.val) + '; changed to minimise pipeline stall (MovMov2Mov 6b)',hp1);
-                            taicpu(hp1).loadconst(0, taicpu(p).oper[0]^.val);
-                          end;
-                      top_reg:
-                        begin
-                          DebugMsg(SPeepholeOptimization + debug_operstr(taicpu(hp1).oper[0]^) + ' = ' + debug_regname(taicpu(p).oper[0]^.reg) + '; changed to minimise pipeline stall (MovMov2Mov 6c)',hp1);
-                          AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
-                          if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
-                            begin
-                              DebugMsg(SPeepholeOptimization + 'Mov2Nop 2 done',hp1);
-                              asml.remove(hp1);
-                              hp1.free;
-                              Result := True;
-                              Exit;
-                            end;
-                          taicpu(hp1).loadreg(0, taicpu(p).oper[0]^.reg);
-                        end;
-                      else
-                        { Do nothing };
-                    end;
+                    { %treg is used afterwards, but all eventualities
+                      other than the first MOV instruction being a constant
+                      are covered by DeepMOVOpt, so only check for that }
+                    if (taicpu(p).oper[0]^.typ = top_const) and
+                      (
+                        { For MOV operations, a size saving is only made if the register/const is byte-sized }
+                        not (cs_opt_size in current_settings.optimizerswitches) or
+                        (taicpu(hp1).opsize = S_B)
+                      ) and
+                      (
+                        (taicpu(hp1).oper[1]^.typ = top_reg) or
+                        ((taicpu(p).oper[0]^.val >= low(longint)) and (taicpu(p).oper[0]^.val <= high(longint)))
+                      ) then
+                      begin
+                        DebugMsg(SPeepholeOptimization + debug_operstr(taicpu(hp1).oper[0]^) + ' = $' + debug_tostr(taicpu(p).oper[0]^.val) + '; changed to minimise pipeline stall (MovMov2Mov 6b)',hp1);
+                        taicpu(hp1).loadconst(0, taicpu(p).oper[0]^.val);
+                      end;
               end;
             if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
                (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
@@ -2473,79 +2438,7 @@
                   Internalerror(2019103001);
               end;
           end;
-        { Change
-           mov %reg1, %reg2
-           xxx %reg2, ???
 
-           to
-
-           mov %reg1, %reg2
-           xxx %reg1, ???
-
-           to avoid a write/read penalty
-        }
-        if MatchOpType(taicpu(p),top_reg,top_reg) and
-           ((MatchInstruction(hp1,A_OR,A_AND,A_TEST,[]) and
-            MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
-            MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^)) or
-            (MatchInstruction(hp1,A_CMP,[]) and
-             MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
-             MatchOpType(taicpu(hp1),top_const,top_reg)
-            )
-           ) then
-           {  we have
-
-              mov %reg1, %reg2
-              test/or/and %reg2, %reg2
-           }
-           begin
-             TransferUsedRegs(TmpUsedRegs);
-             UpdateUsedRegs(TmpUsedRegs, tai(p.next));
-             { reg1 will be used after the first instruction,
-               so update the allocation info                  }
-             AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
-             if GetNextInstruction(hp1, hp2) and
-                (hp2.typ = ait_instruction) and
-                taicpu(hp2).is_jmp and
-                not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
-                 { change
-
-                   mov %reg1, %reg2
-                   test/or/and %reg2, %reg2
-                   jxx
-
-                   to
-
-                   test %reg1, %reg1
-                   jxx
-                 }
-                 begin
-                   if taicpu(hp1).opcode<>A_CMP then
-                     taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
-                   taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
-                   DebugMsg(SPeepholeOptimization + 'MovTest/Cmp/Or/AndJxx2Test/Cmp/Or/AndJxx done',p);
-                   RemoveCurrentP(p);
-                   Exit;
-                 end
-               else
-                 { change
-
-                   mov %reg1, %reg2
-                   test/or/and %reg2, %reg2
-
-                   to
-
-                   mov %reg1, %reg2
-                   test/or/and %reg1, %reg1
-
-                   }
-                 begin
-                   if taicpu(hp1).opcode<>A_CMP then
-                     taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
-                   taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
-                   DebugMsg(SPeepholeOptimization + 'MovTest/Cmp/Or/AndJxx2MovTest/Cmp/Or/AndJxx done',p);
-                 end;
-           end;
         { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
           x >= RetOffset) as it doesn't do anything (it writes either to a
           parameter or to the temporary storage room for the function
mov-redundant-strip.patch (7,733 bytes)   

J. Gareth Moreton

2020-02-10 00:27

developer   ~0120985

Last edited: 2020-02-10 01:23

View 2 revisions

Full win32 and win64 regression suite with combination of patches from 0036669, 36670, 0036675 and 0036680 was successful.

(Never mind - had a script error - re-running tests)

J. Gareth Moreton

2020-02-10 00:30

developer   ~0120987

Additional patch that removes an optimisation from OptPass2MOV ('MovMovXX2MoVXX 1') that is no longer executed for the same reasons as above... DeepMOVOpt does all the work!
mov2-redundant-strip.patch (1,642 bytes)   
Index: compiler/x86/aoptx86.pas
===================================================================
--- compiler/x86/aoptx86.pas	(revision 44140)
+++ compiler/x86/aoptx86.pas	(working copy)
@@ -4012,32 +4012,6 @@
               end;
           end
         else if MatchOpType(taicpu(p),top_reg,top_reg) and
-{$ifdef x86_64}
-          MatchInstruction(hp1,[A_MOV,A_MOVZX,A_MOVSX,A_MOVSXD],[]) and
-{$else x86_64}
-          MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
-{$endif x86_64}
-          MatchOpType(taicpu(hp1),top_ref,top_reg) and
-          ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
-           or
-           (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
-            ) and
-          (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
-          { mov reg1, reg2
-            mov/zx/sx (reg2, ..), reg2      to   mov/zx/sx (reg1, ..), reg2}
-          begin
-            if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
-              taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
-            if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
-              taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
-            DebugMsg(SPeepholeOptimization + 'MovMovXX2MoVXX 1 done',p);
-            asml.remove(p);
-            p.free;
-            p := hp1;
-            Result:=true;
-            exit;
-          end
-        else if MatchOpType(taicpu(p),top_reg,top_reg) and
           MatchInstruction(hp1, A_SAR, []) then
           begin
             if MatchOperand(taicpu(hp1).oper[0]^, 31) then
mov2-redundant-strip.patch (1,642 bytes)   

J. Gareth Moreton

2020-02-10 09:37

developer   ~0120992

Test re-run seems to pass.

Florian

2020-02-13 21:54

administrator   ~0121090

Thanks, both applied.

Issue History

Date Modified Username Field Change
2020-02-07 00:28 J. Gareth Moreton New Issue
2020-02-07 00:28 J. Gareth Moreton File Added: mov-redundant-strip.patch
2020-02-07 00:28 J. Gareth Moreton Tag Attached: compiler
2020-02-07 00:28 J. Gareth Moreton Tag Attached: patch
2020-02-07 00:28 J. Gareth Moreton Tag Attached: i386
2020-02-07 00:28 J. Gareth Moreton Tag Attached: x86_64
2020-02-07 00:28 J. Gareth Moreton Tag Attached: optimization
2020-02-07 00:28 J. Gareth Moreton Tag Attached: refactor
2020-02-07 00:28 J. Gareth Moreton Priority normal => low
2020-02-07 00:28 J. Gareth Moreton Severity minor => tweak
2020-02-07 00:28 J. Gareth Moreton FPCTarget => -
2020-02-07 15:32 J. Gareth Moreton Tag Attached: x86
2020-02-08 07:53 J. Gareth Moreton Summary [Patch] x86 "OptPass1MOV" improvements - Part 2½ => [Patch / Refactor] x86 "OptPass1MOV" improvements - Part 2½
2020-02-10 00:27 J. Gareth Moreton Note Added: 0120985
2020-02-10 00:30 J. Gareth Moreton File Added: mov2-redundant-strip.patch
2020-02-10 00:30 J. Gareth Moreton Note Added: 0120987
2020-02-10 01:23 J. Gareth Moreton Note Edited: 0120985 View Revisions
2020-02-10 09:37 J. Gareth Moreton Note Added: 0120992
2020-02-10 18:36 J. Gareth Moreton Additional Information Updated View Revisions
2020-02-13 21:54 Florian Assigned To => Florian
2020-02-13 21:54 Florian Status new => resolved
2020-02-13 21:54 Florian Resolution open => fixed
2020-02-13 21:54 Florian Fixed in Version => 3.3.1
2020-02-13 21:54 Florian Fixed in Revision => 44164, 44166
2020-02-13 21:54 Florian Note Added: 0121090