optimization idea: keep local record fields in registers
Original Reporter info from Mantis: BeniBela @benibela
-
Reporter name: Benito van der Zander
Original Reporter info from Mantis: BeniBela @benibela
- Reporter name: Benito van der Zander
Description:
Local records should be treated like local variables, when nothing takes the address of the record
Steps to reproduce:
Compare these two functions that do the same:
function foo: integer;
var i, s: integer;
begin
s := 0;
i := 1;
while i <= 10 do begin
s := s + i;
inc(i);
end;
result := s;
end;
function foo2: integer;
var w: record
i, s: integer;
end;
begin
with w do begin
s := 0;
i := 1;
while i <= 10 do begin
s := s + i;
inc(i);
end;
result := s;
end;
end;
But the record case makes a lot of useless memory accesses:
amd64:
foo:
project1.lpr:15 s := 0;
0000000000401090 31c0 xor %eax,%eax
project1.lpr:16 i := 1;
0000000000401092 ba01000000 mov $0x1,%edx
project1.lpr:17 while i <= 10 do begin
0000000000401097 eb0c jmp 0x4010a5 &LtPos;FOO+21>
0000000000401099 0f1f8000000000 nopl 0x0(%rax)
project1.lpr:18 s := s + i;
00000000004010A0 01d0 add %edx,%eax
project1.lpr:19 inc(i);
00000000004010A2 83c201 add $0x1,%edx
project1.lpr:17 while i <= 10 do begin
00000000004010A5 83fa0a cmp $0xa,%edx
00000000004010A8 7ef6 jle 0x4010a0 &LtPos;FOO+16>
project1.lpr:22 end;
00000000004010AA c3 retq
foo2:
00000000004010B0 488d6424f8 lea -0x8(%rsp),%rsp
project1.lpr:30 s := 0;
00000000004010B5 c744240400000000 movl $0x0,0x4(%rsp)
project1.lpr:31 i := 1;
00000000004010BD c7042401000000 movl $0x1,(%rsp)
project1.lpr:32 while i <= 10 do begin
00000000004010C4 eb15 jmp 0x4010db &LtPos;FOO2+43>
00000000004010C6 66660f1f840000000000 data16 nopw 0x0(%rax,%rax,1)
project1.lpr:33 s := s + i;
00000000004010D0 8b0424 mov (%rsp),%eax
00000000004010D3 01442404 add %eax,0x4(%rsp)
project1.lpr:34 inc(i);
00000000004010D7 83042401 addl $0x1,(%rsp)
project1.lpr:32 while i <= 10 do begin
00000000004010DB 833c240a cmpl $0xa,(%rsp)
00000000004010DF 7eef jle 0x4010d0 &LtPos;FOO2+32>
project1.lpr:36 result := s;
00000000004010E1 8b442404 mov 0x4(%rsp),%eax
project1.lpr:38 end;
00000000004010E5 488d642408 lea 0x8(%rsp),%rsp
00000000004010EA c3 retq
arm (where it is probably worse with smaller caches and more instructions):
Dump of assembler code for function FOO:
15 s := 0;
0x00008150 <+0>: mov r0, #0
16 i := 1;
0x00008154 <+4>: mov r1, #1
17 while i <= 10 do begin
0x00008158 <+8>: b 0x8164 <FOO+20>
18 s := s + i;
0x0000815c <+12>: add r0, r0, r1
19 inc(i);
0x00008160 <+16>: add r1, r1, #1
0x00008164 <+20>: cmp r1, #10
0x00008168 <+24>: ble 0x815c <FOO+12>
20 end;
21 result := s;
22 end;
0x0000816c <+28>: mov pc, lr
Dump of assembler code for function FOO2:
28 begin
0x00008170 <+0>: sub sp, sp, #8
29 with w do begin
30 s := 0;
0x00008174 <+4>: mov r0, #0
0x00008178 <+8>: str r0, [sp, #4]
31 i := 1;
0x0000817c <+12>: mov r0, #1
0x00008180 <+16>: str r0, [sp]
32 while i <= 10 do begin
0x00008184 <+20>: b 0x81a4 <FOO2+52>
33 s := s + i;
0x00008188 <+24>: ldr r0, [sp]
0x0000818c <+28>: ldr r1, [sp, #4]
0x00008190 <+32>: add r1, r1, r0
34 inc(i);
0x00008194 <+36>: ldr r0, [sp]
0x00008198 <+40>: str r1, [sp, #4]
0x0000819c <+44>: add r0, r0, #1
0x000081a0 <+48>: str r0, [sp]
0x000081a4 <+52>: ldr r0, [sp]
0x000081a8 <+56>: cmp r0, #10
0x000081ac <+60>: ble 0x8188 <FOO2+24>
35 end;
36 result := s;
0x000081b0 <+64>: ldr r0, [sp, #4]
37 end;
38 end;
0x000081b4 <+68>: add sp, sp, #8
0x000081b8 <+72>: mov pc, lr
0x000081bc <+76>: andeq r0, r0, r0
Additional information:
That is especially important when the record is an inlined enumerator
Mantis conversion info:
- Mantis ID: 34915
- Build: r40721
- Version: 3.3.1