View Issue Details

IDProjectCategoryView StatusLast Update
0035691FPCRTLpublic2019-07-26 21:27
ReporterChristo CrauseAssigned ToJeppe Johansen 
PrioritynormalSeverityminorReproducibilityN/A
Status closedResolutionfixed 
Product Version3.3.1Product Build42190 
Target VersionFixed in Version3.3.1 
Summary0035691: AVR [patch] div, mod and shift operations for 64 bit math
DescriptionThe fall-back pascal code for 64 bit math in rtl generates a lot of code. Example below in Steps to Reproduce generates 4816 bytes of code and uses 312 bytes of data using the trunk compiler and rtl.

Applying the attached patch reduces the program to 2168 bytes of code and 50 bytes of data.

Note 1. The div/mod routines are based on the same algorithm as the byte/word/dword versions in math.inc. I also included the div by zero check.

Note 2. The shift routines doesn't preserve SREG because I want the code as small as possible. If SREG should be preserved I can add this.
Steps To Reproduceprogram intmathtest;
var
  ans, x, d: int64;
begin
  x := -1;
  d := 0;
  ans := (x div d) shl 7;
end.

Compile with:
 ~/fpc/installs/lib/fpc/3.3.1/ppcrossavr -Tembedded -Pavr -O2 -vewnhi -Wpatmega328p -XPavr- intmathtest.pp
TagsAVR
Fixed in Revision42495
FPCOldBugId
FPCTarget-
Attached Files
  • int64.patch (12,023 bytes)
    Index: rtl/avr/int64p.inc
    ===================================================================
    --- rtl/avr/int64p.inc	(revision 42190)
    +++ rtl/avr/int64p.inc	(working copy)
    @@ -12,3 +12,577 @@
         MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
     
      **********************************************************************}
    +
    +{$define FPC_SYSTEM_HAS_SHR_QWORD}
    +// Simplistic version with checking if whole bytes can be shifted
    +// Doesn't change bitshift portion even if possible because of byteshift
    +// Shorter code but not shortest execution time version
    +function fpc_shr_qword(value: qword; shift: sizeint): qword; assembler; nostackframe;
    +  [public, alias: 'FPC_SHR_QWORD']; compilerproc;
    +label
    +  byteshift, bitshift, finish;
    +asm
    +// value passed in R25...R18
    +// shift passed in R16
    +// return value in R25...R18
    +
    +  push R16
    +
    +  andi R16, 63    // mask 64 bit relevant value per generic routine
    +byteshift:
    +  breq finish     // shift = 0, finished
    +  cpi R16, 8      // Check if shift is at least a byte
    +  brlo bitshift
    +  mov R18, R19    // if so, then shift all bytes right by 1 position
    +  mov R19, R20
    +  mov R20, R21
    +  mov R21, R22
    +  mov R22, R23
    +  mov R23, R24
    +  mov R24, R25
    +  clr R25         // and clear the high byte
    +  subi R16, 8     // subtract 8 bits from shift
    +  rjmp byteshift  // check if another byte can be shifted
    +
    +bitshift:         // shift all 8 bytes right by 1 bit
    +  lsr R25
    +  ror R24
    +  ror R23
    +  ror R22
    +  ror R21
    +  ror R20
    +  ror R19
    +  ror R18
    +  dec R16
    +  brne bitshift   // until R16 = 0
    +
    +finish:
    +  pop R16
    +end;
    +function fpc_shr_qword(value: qword; shift: sizeint): qword; external name 'FPC_SHR_QWORD';
    +
    +{$define FPC_SYSTEM_HAS_SHL_QWORD}
    +function fpc_shl_qword(value: qword; shift: sizeint): qword; assembler; nostackframe;
    +[public, alias: 'FPC_SHL_QWORD']; compilerproc;
    +label
    +  byteshift, bitshift, finish;
    +asm
    +// value passed in R25...R18
    +// shift passed in R16
    +// return value in R25...R18
    +  push R16
    +
    +  andi R16, 63    // mask 64 bit relevant value per generic routine
    +byteshift:
    +  breq finish     // shift = 0, finished
    +  cpi R16, 8      // Check if shift is at least a byte
    +  brlo bitshift
    +  mov R25, R24    // if so, then shift all bytes left by 1 position
    +  mov R24, R23
    +  mov R23, R22
    +  mov R22, R21
    +  mov R21, R20
    +  mov R20, R19
    +  mov R19, R18
    +  clr R18         // and clear the high byte
    +  subi R16, 8     // subtract 8 bits from shift
    +  rjmp byteshift  // check if another byte can be shifted
    +
    +bitshift:         // shift all 8 bytes left by 1 bit
    +  lsl R18
    +  rol R19
    +  rol R20
    +  rol R21
    +  rol R22
    +  rol R23
    +  rol R24
    +  rol R25
    +
    +  dec R16
    +  brne bitshift   // until R16 = 0
    +
    +finish:
    +  pop R16
    +end;
    +
    +function fpc_shl_qword(value: qword; shift: sizeint): qword; external name 'FPC_SHL_QWORD';
    +
    +{$define FPC_SYSTEM_HAS_SHL_INT64}
    +function fpc_shl_int64(value: int64; shift: sizeint): int64;
    +  [public, alias: 'FPC_SHL_INT64']; compilerproc; inline;
    +begin
    +  Result := fpc_shl_qword(qword(value), shift);
    +end;
    +
    +{$define FPC_SYSTEM_HAS_SHR_INT64}
    +// shr of signed int is same as shr of unsigned int (logical shift right)
    +function fpc_shr_int64(value: int64; shift: sizeint): int64; [public, alias: 'FPC_SHR_INT64']; compilerproc;
    +begin
    +  Result := fpc_shr_qword(qword(value), shift);
    +end;
    +
    +{$define FPC_SYSTEM_HAS_DIV_QWORD}
    +function fpc_div_qword(n,z : qword): qword; nostackframe; assembler; [public,alias: 'FPC_DIV_QWORD']; compilerproc;
    +label
    +  start, div1, div2, div3, finish;
    +asm
    +// Symbol  Name        Register(s)
    +// z (A)   dividend    R17, R16, R15, R14, R13, R12, R11, R10
    +// n (B)   divisor     R25, R24, R23, R22, R21, R20, R19, R18
    +// r (P)   remainder   R9,  R8,  R7,  R6,  R5,  R4,  R3,  R2
    +// i       counter     R26
    +//         1           R27
    +
    +  cp R25, R1
    +  cpc R24, R1
    +  cpc R23, R1
    +  cpc R22, R1
    +  cpc R21, R1
    +  cpc R20, R1
    +  cpc R19, R1
    +  cpc R18, R1
    +
    +  brne .LNonZero
    +{$ifdef CPUAVR_HAS_JMP_CALL}
    +  call get_pc_addr
    +{$else  CPUAVR_HAS_JMP_CALL}
    +  rcall get_pc_addr
    +{$endif CPUAVR_HAS_JMP_CALL}
    +  movw R20, R24
    +{$ifdef CPUAVR_HAS_JMP_CALL}
    +  call get_frame
    +{$else  CPUAVR_HAS_JMP_CALL}
    +  rcall get_frame
    +{$endif CPUAVR_HAS_JMP_CALL}
    +  movw R18, R24
    +  ldi R22, 200
    +  clr R23
    +  clr R24
    +  clr R25
    +{$ifdef CPUAVR_HAS_JMP_CALL}
    +  call HandleErrorAddrFrameInd
    +{$else  CPUAVR_HAS_JMP_CALL}
    +  rcall HandleErrorAddrFrameInd
    +{$endif CPUAVR_HAS_JMP_CALL}
    +
    +.LNonZero:
    +
    +  push R17
    +  push R16
    +  push R15
    +  push R14
    +  push R13
    +  push R12
    +  push R11
    +  push R10
    +  push R9
    +  push R8
    +  push R7
    +  push R6
    +  push R5
    +  push R4
    +  push R3
    +  push R2
    +
    +  ldi R27, 1      // needed below for OR instruction
    +
    +start:            // Start of division...
    +  clr R9          // clear remainder
    +  clr R8
    +  clr R7
    +  clr R6
    +  clr R5
    +  clr R4
    +  clr R3
    +  clr R2
    +  ldi R26, 64     // iterate over 64 bits
    +
    +div1:
    +  lsl R10         // shift left A_L
    +  rol R11
    +  rol R12
    +  rol R13
    +  rol R14
    +  rol R15
    +  rol R16
    +  rol R17
    +
    +  rol R2          // shift left P with carry from A shift
    +  rol R3
    +  rol R4
    +  rol R5
    +  rol R6
    +  rol R7
    +  rol R8
    +  rol R9
    +
    +  sub R2, R18     // Subtract B from P, P <= P - B
    +  sbc R3, R19
    +  sbc R4, R20
    +  sbc R5, R21
    +  sbc R6, R22
    +  sbc R7, R23
    +  sbc R8, R24
    +  sbc R9, R25
    +
    +  brlo div2
    +  or R10, R27     // Set A[0] = 1
    +  rjmp div3
    +div2:             // negative branch, A[0] = 0 (default after shift), restore P
    +
    +  add R2, R18     // restore old value of P
    +  adc R3, R19
    +  adc R4, R20
    +  adc R5, R21
    +  adc R6, R22
    +  adc R7, R23
    +  adc R8, R24
    +  adc R9, R25
    +
    +div3:
    +  dec R26
    +  breq finish
    +  rjmp div1
    +
    +finish:
    +  mov R25, R17    // Move answer from R17..10 to R25..18
    +  mov R24, R16
    +  mov R23, R15
    +  mov R22, R14
    +  mov R21, R13
    +  mov R20, R12
    +  mov R19, R11
    +  mov R18, R10
    +
    +  pop R2
    +  pop R3
    +  pop R4
    +  pop R5
    +  pop R6
    +  pop R7
    +  pop R8
    +  pop R9
    +  pop R10
    +  pop R11
    +  pop R12
    +  pop R13
    +  pop R14
    +  pop R15
    +  pop R16
    +  pop R17
    +end;
    +function fpc_div_qword(n,z : qword): qword; external name 'FPC_DIV_QWORD';
    +
    +{$define FPC_SYSTEM_HAS_MOD_QWORD}
    +function fpc_mod_qword(n,z : qword): qword; nostackframe; assembler; [public,alias: 'FPC_MOD_QWORD']; compilerproc;
    +label
    +  start, div1, div2, div3, finish;
    +asm
    +// Symbol  Name        Register(s)
    +// z (A)   dividend    R17, R16, R15, R14, R13, R12, R11, R10
    +// n (B)   divisor     R25, R24, R23, R22, R21, R20, R19, R18
    +// r (P)   remainder   R9,  R8,  R7,  R6,  R5,  R4,  R3,  R2
    +// i	   counter     R26
    +//         1           R27
    +
    +  cp R25, R1
    +  cpc R24, R1
    +  cpc R23, R1
    +  cpc R22, R1
    +  cpc R21, R1
    +  cpc R20, R1
    +  cpc R19, R1
    +  cpc R18, R1
    +
    +  brne .LNonZero
    +{$ifdef CPUAVR_HAS_JMP_CALL}
    +  call get_pc_addr
    +{$else  CPUAVR_HAS_JMP_CALL}
    +  rcall get_pc_addr
    +{$endif CPUAVR_HAS_JMP_CALL}
    +  movw R20, R24
    +{$ifdef CPUAVR_HAS_JMP_CALL}
    +  call get_frame
    +{$else  CPUAVR_HAS_JMP_CALL}
    +  rcall get_frame
    +{$endif CPUAVR_HAS_JMP_CALL}
    +  movw R18, R24
    +  ldi R22, 200
    +  clr R23
    +  clr R24
    +  clr R25
    +{$ifdef CPUAVR_HAS_JMP_CALL}
    +  call HandleErrorAddrFrameInd
    +{$else  CPUAVR_HAS_JMP_CALL}
    +  rcall HandleErrorAddrFrameInd
    +{$endif CPUAVR_HAS_JMP_CALL}
    +
    +.LNonZero:
    +
    +  push R17
    +  push R16
    +  push R15
    +  push R14
    +  push R13
    +  push R12
    +  push R11
    +  push R10
    +  push R9
    +  push R8
    +  push R7
    +  push R6
    +  push R5
    +  push R4
    +  push R3
    +  push R2
    +
    +  ldi R27, 1
    +start:            // Start of division...
    +  clr R9          // clear remainder
    +  clr R8
    +  clr R7
    +  clr R6
    +  clr R5
    +  clr R4
    +  clr R3
    +  clr R2
    +  ldi R26, 64     // iterate over 64 bits
    +
    +div1:
    +  lsl R10         // shift left A_L
    +  rol R11
    +  rol R12
    +  rol R13
    +  rol R14
    +  rol R15
    +  rol R16
    +  rol R17
    +
    +  rol R2          // shift left P with carry from A shift
    +  rol R3
    +  rol R4
    +  rol R5
    +  rol R6
    +  rol R7
    +  rol R8
    +  rol R9
    +
    +  sub R2, R18     // Subtract B from P, P <= P - B
    +  sbc R3, R19
    +  sbc R4, R20
    +  sbc R5, R21
    +  sbc R6, R22
    +  sbc R7, R23
    +  sbc R8, R24
    +  sbc R9, R25
    +
    +  brlo div2
    +  or R10, R27     // Set A[0] = 1
    +  rjmp div3
    +div2:             // negative branch, A[0] = 0 (default after shift), restore P
    +
    +  add R2, R18     // restore old value of P
    +  adc R3, R19
    +  adc R4, R20
    +  adc R5, R21
    +  adc R6, R22
    +  adc R7, R23
    +  adc R8, R24
    +  adc R9, R25
    +
    +div3:
    +  dec R26
    +  breq finish
    +  rjmp div1
    +
    +finish:
    +  mov R25, R9     // Move answer from R9..2 to R25..18
    +  mov R24, R8
    +  mov R23, R7
    +  mov R22, R6
    +  mov R21, R5
    +  mov R20, R4
    +  mov R19, R3
    +  mov R18, R2
    +
    +  pop R2
    +  pop R3
    +  pop R4
    +  pop R5
    +  pop R6
    +  pop R7
    +  pop R8
    +  pop R9
    +  pop R10
    +  pop R11
    +  pop R12
    +  pop R13
    +  pop R14
    +  pop R15
    +  pop R16
    +  pop R17
    +end;
    +function fpc_mod_qword(n,z : qword): qword; external name 'FPC_MOD_QWORD';
    +
    +
    +{$define FPC_SYSTEM_HAS_DIV_INT64}
    +function fpc_div_int64(n,z : int64) : int64; nostackframe; assembler; [public,alias: 'FPC_DIV_INT64']; compilerproc;
    +label
    +  pos1, pos2, fin;
    +asm
    +// Convert n, z to unsigned int, then call div_qword,
    +// Restore sign if high bits of n xor z is negative
    +// n       divisor     R25, R24, R23, R22, R21, R20, R19, R18
    +// z       dividend    R17, R16, R15, R14, R13, R12, R11, R10
    +//         neg_result  R30
    +//         one         R31
    +
    +  mov R30, R17    // store hi8(z)
    +  eor R30, R25  // hi8(z) XOR hi8(n), answer must be negative if MSB set
    +
    +  // convert n to absolute
    +  ldi R31, 1      // 1 in R31 used later
    +  sub R25, r1     // subtract 0, just to check sign flag
    +  brpl pos1
    +  com R25
    +  com R24
    +  com R23
    +  com R22
    +  com R21
    +  com R20
    +  com R19
    +  com R18
    +  add R18, R31    // add 1
    +  adc R19, R1     // add carry bit
    +  adc R20, R1
    +  adc R21, R1
    +  adc R22, R1
    +  adc R23, R1
    +  adc R24, R1
    +  adc R25, R1
    +  pos1:
    +
    +  sub R17, R1
    +  brpl pos2
    +  com R17
    +  com R16
    +  com R15
    +  com R14
    +  com R13
    +  com R12
    +  com R11
    +  com R10
    +  add R10, R31
    +  adc R11, R1
    +  adc R12, R1
    +  adc R13, R1
    +  adc R14, R1
    +  adc R15, R1
    +  adc R16, R1
    +  adc R17, R1
    +  pos2:
    +
    +  rcall fpc_div_qword
    +
    +  sbrs R30, 7     // skip if bit 7 is cleared (result should be positive)
    +  rjmp fin
    +  com R25         // result from FPC_DIV_WORD in R25 ... R22
    +  com R24
    +  com R23
    +  com R22
    +  com R21
    +  com R20
    +  com R19
    +  com R18
    +
    +  ldi R31, 1
    +  add R18, R31    // add 1
    +  adc R19, R1     // add carry bit
    +  adc R20, R1
    +  adc R21, R1
    +  adc R22, R1
    +  adc R23, R1
    +  adc R24, R1
    +  adc R25, R1
    +  fin:
    +end;
    +
    +{$define FPC_SYSTEM_HAS_MOD_INT64}
    +function fpc_mod_int64(n,z : int64) : int64; nostackframe; assembler; [public,alias: 'FPC_MOD_INT64']; compilerproc;
    +label
    +  pos1, pos2, fin;
    +asm
    +// Convert n, z to unsigned int, then call mod_qword,
    +// Restore sign if high bits of n xor z is negative
    +// n       divisor     R25, R24, R23, R22, R21, R20, R19, R18
    +// z       dividend    R17, R16, R15, R14, R13, R12, R11, R10
    +//         neg_result  R30
    +//         one         R31
    +
    +  mov R30, R17  // store hi8(z)
    +
    +  // convert n to absolute
    +  ldi R31, 1
    +  sub R25, r1     // subtract 0, just to check sign flag
    +  brpl pos1
    +  com R25
    +  com R24
    +  com R23
    +  com R22
    +  com R21
    +  com R20
    +  com R19
    +  com R18
    +  add R18, R31    // add 1
    +  adc R19, R1     // add carry bit
    +  adc R20, R1
    +  adc R21, R1
    +  adc R22, R1
    +  adc R23, R1
    +  adc R24, R1
    +  adc R25, R1
    +  pos1:
    +
    +  sub R17, R1
    +  brpl pos2
    +  com R17
    +  com R16
    +  com R15
    +  com R14
    +  com R13
    +  com R12
    +  com R11
    +  com R10
    +  add R10, R31
    +  adc R11, R1
    +  adc R12, R1
    +  adc R13, R1
    +  adc R14, R1
    +  adc R15, R1
    +  adc R16, R1
    +  adc R17, R1
    +  pos2:
    +
    +  rcall fpc_mod_qword
    +
    +  sbrs R30, 7     // Not finished if sign bit is set
    +  rjmp fin
    +  com R25         // Convert to 2's complement
    +  com R24         // Complement all bits...
    +  com R23
    +  com R22
    +  com R21
    +  com R20
    +  com R19
    +  com R18
    +  ldi R31, 1
    +  add R18, R31    // ...and add 1 to answer
    +  adc R19, R1
    +  adc R20, R1
    +  adc R21, R1
    +  adc R22, R1
    +  adc R23, R1
    +  adc R24, R1
    +  adc R25, R1
    +  fin:
    +end;
    
    int64.patch (12,023 bytes)
  • int64p.inc.patch (12,023 bytes)
    Index: rtl/avr/int64p.inc
    ===================================================================
    --- rtl/avr/int64p.inc	(revision 42245)
    +++ rtl/avr/int64p.inc	(working copy)
    @@ -12,3 +12,577 @@
         MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
     
      **********************************************************************}
    +
    +{$define FPC_SYSTEM_HAS_SHR_QWORD}
    +// Simplistic version with checking if whole bytes can be shifted
    +// Doesn't change bitshift portion even if possible because of byteshift
    +// Shorter code but not shortest execution time version
    +function fpc_shr_qword(value: qword; shift: ALUUInt): qword; assembler; nostackframe;
    +  [public, alias: 'FPC_SHR_QWORD']; compilerproc;
    +label
    +  byteshift, bitshift, finish;
    +asm
    +// value passed in R25...R18
    +// shift passed in R16
    +// return value in R25...R18
    +
    +  push R16
    +
    +  andi R16, 63    // mask 64 bit relevant value per generic routine
    +byteshift:
    +  breq finish     // shift = 0, finished
    +  cpi R16, 8      // Check if shift is at least a byte
    +  brlo bitshift
    +  mov R18, R19    // if so, then shift all bytes right by 1 position
    +  mov R19, R20
    +  mov R20, R21
    +  mov R21, R22
    +  mov R22, R23
    +  mov R23, R24
    +  mov R24, R25
    +  clr R25         // and clear the high byte
    +  subi R16, 8     // subtract 8 bits from shift
    +  rjmp byteshift  // check if another byte can be shifted
    +
    +bitshift:         // shift all 8 bytes right by 1 bit
    +  lsr R25
    +  ror R24
    +  ror R23
    +  ror R22
    +  ror R21
    +  ror R20
    +  ror R19
    +  ror R18
    +  dec R16
    +  brne bitshift   // until R16 = 0
    +
    +finish:
    +  pop R16
    +end;
    +function fpc_shr_qword(value: qword; shift: ALUUInt): qword; external name 'FPC_SHR_QWORD';
    +
    +{$define FPC_SYSTEM_HAS_SHL_QWORD}
    +function fpc_shl_qword(value: qword; shift: ALUUInt): qword; assembler; nostackframe;
    +[public, alias: 'FPC_SHL_QWORD']; compilerproc;
    +label
    +  byteshift, bitshift, finish;
    +asm
    +// value passed in R25...R18
    +// shift passed in R16
    +// return value in R25...R18
    +  push R16
    +
    +  andi R16, 63    // mask 64 bit relevant value per generic routine
    +byteshift:
    +  breq finish     // shift = 0, finished
    +  cpi R16, 8      // Check if shift is at least a byte
    +  brlo bitshift
    +  mov R25, R24    // if so, then shift all bytes left by 1 position
    +  mov R24, R23
    +  mov R23, R22
    +  mov R22, R21
    +  mov R21, R20
    +  mov R20, R19
    +  mov R19, R18
    +  clr R18         // and clear the high byte
    +  subi R16, 8     // subtract 8 bits from shift
    +  rjmp byteshift  // check if another byte can be shifted
    +
    +bitshift:         // shift all 8 bytes left by 1 bit
    +  lsl R18
    +  rol R19
    +  rol R20
    +  rol R21
    +  rol R22
    +  rol R23
    +  rol R24
    +  rol R25
    +
    +  dec R16
    +  brne bitshift   // until R16 = 0
    +
    +finish:
    +  pop R16
    +end;
    +
    +function fpc_shl_qword(value: qword; shift: ALUUInt): qword; external name 'FPC_SHL_QWORD';
    +
    +{$define FPC_SYSTEM_HAS_SHL_INT64}
    +function fpc_shl_int64(value: int64; shift: ALUUInt): int64;
    +  [public, alias: 'FPC_SHL_INT64']; compilerproc; inline;
    +begin
    +  Result := fpc_shl_qword(qword(value), shift);
    +end;
    +
    +{$define FPC_SYSTEM_HAS_SHR_INT64}
    +// shr of signed int is same as shr of unsigned int (logical shift right)
    +function fpc_shr_int64(value: int64; shift: ALUUInt): int64; [public, alias: 'FPC_SHR_INT64']; compilerproc;
    +begin
    +  Result := fpc_shr_qword(qword(value), shift);
    +end;
    +
    +{$define FPC_SYSTEM_HAS_DIV_QWORD}
    +function fpc_div_qword(n,z : qword): qword; nostackframe; assembler; [public,alias: 'FPC_DIV_QWORD']; compilerproc;
    +label
    +  start, div1, div2, div3, finish;
    +asm
    +// Symbol  Name        Register(s)
    +// z (A)   dividend    R17, R16, R15, R14, R13, R12, R11, R10
    +// n (B)   divisor     R25, R24, R23, R22, R21, R20, R19, R18
    +// r (P)   remainder   R9,  R8,  R7,  R6,  R5,  R4,  R3,  R2
    +// i       counter     R26
    +//         1           R27
    +
    +  cp R25, R1
    +  cpc R24, R1
    +  cpc R23, R1
    +  cpc R22, R1
    +  cpc R21, R1
    +  cpc R20, R1
    +  cpc R19, R1
    +  cpc R18, R1
    +
    +  brne .LNonZero
    +{$ifdef CPUAVR_HAS_JMP_CALL}
    +  call get_pc_addr
    +{$else  CPUAVR_HAS_JMP_CALL}
    +  rcall get_pc_addr
    +{$endif CPUAVR_HAS_JMP_CALL}
    +  movw R20, R24
    +{$ifdef CPUAVR_HAS_JMP_CALL}
    +  call get_frame
    +{$else  CPUAVR_HAS_JMP_CALL}
    +  rcall get_frame
    +{$endif CPUAVR_HAS_JMP_CALL}
    +  movw R18, R24
    +  ldi R22, 200
    +  clr R23
    +  clr R24
    +  clr R25
    +{$ifdef CPUAVR_HAS_JMP_CALL}
    +  call HandleErrorAddrFrameInd
    +{$else  CPUAVR_HAS_JMP_CALL}
    +  rcall HandleErrorAddrFrameInd
    +{$endif CPUAVR_HAS_JMP_CALL}
    +
    +.LNonZero:
    +
    +  push R17
    +  push R16
    +  push R15
    +  push R14
    +  push R13
    +  push R12
    +  push R11
    +  push R10
    +  push R9
    +  push R8
    +  push R7
    +  push R6
    +  push R5
    +  push R4
    +  push R3
    +  push R2
    +
    +  ldi R27, 1      // needed below for OR instruction
    +
    +start:            // Start of division...
    +  clr R9          // clear remainder
    +  clr R8
    +  clr R7
    +  clr R6
    +  clr R5
    +  clr R4
    +  clr R3
    +  clr R2
    +  ldi R26, 64     // iterate over 64 bits
    +
    +div1:
    +  lsl R10         // shift left A_L
    +  rol R11
    +  rol R12
    +  rol R13
    +  rol R14
    +  rol R15
    +  rol R16
    +  rol R17
    +
    +  rol R2          // shift left P with carry from A shift
    +  rol R3
    +  rol R4
    +  rol R5
    +  rol R6
    +  rol R7
    +  rol R8
    +  rol R9
    +
    +  sub R2, R18     // Subtract B from P, P <= P - B
    +  sbc R3, R19
    +  sbc R4, R20
    +  sbc R5, R21
    +  sbc R6, R22
    +  sbc R7, R23
    +  sbc R8, R24
    +  sbc R9, R25
    +
    +  brlo div2
    +  or R10, R27     // Set A[0] = 1
    +  rjmp div3
    +div2:             // negative branch, A[0] = 0 (default after shift), restore P
    +
    +  add R2, R18     // restore old value of P
    +  adc R3, R19
    +  adc R4, R20
    +  adc R5, R21
    +  adc R6, R22
    +  adc R7, R23
    +  adc R8, R24
    +  adc R9, R25
    +
    +div3:
    +  dec R26
    +  breq finish
    +  rjmp div1
    +
    +finish:
    +  mov R25, R17    // Move answer from R17..10 to R25..18
    +  mov R24, R16
    +  mov R23, R15
    +  mov R22, R14
    +  mov R21, R13
    +  mov R20, R12
    +  mov R19, R11
    +  mov R18, R10
    +
    +  pop R2
    +  pop R3
    +  pop R4
    +  pop R5
    +  pop R6
    +  pop R7
    +  pop R8
    +  pop R9
    +  pop R10
    +  pop R11
    +  pop R12
    +  pop R13
    +  pop R14
    +  pop R15
    +  pop R16
    +  pop R17
    +end;
    +function fpc_div_qword(n,z : qword): qword; external name 'FPC_DIV_QWORD';
    +
    +{$define FPC_SYSTEM_HAS_MOD_QWORD}
    +function fpc_mod_qword(n,z : qword): qword; nostackframe; assembler; [public,alias: 'FPC_MOD_QWORD']; compilerproc;
    +label
    +  start, div1, div2, div3, finish;
    +asm
    +// Symbol  Name        Register(s)
    +// z (A)   dividend    R17, R16, R15, R14, R13, R12, R11, R10
    +// n (B)   divisor     R25, R24, R23, R22, R21, R20, R19, R18
    +// r (P)   remainder   R9,  R8,  R7,  R6,  R5,  R4,  R3,  R2
    +// i	   counter     R26
    +//         1           R27
    +
    +  cp R25, R1
    +  cpc R24, R1
    +  cpc R23, R1
    +  cpc R22, R1
    +  cpc R21, R1
    +  cpc R20, R1
    +  cpc R19, R1
    +  cpc R18, R1
    +
    +  brne .LNonZero
    +{$ifdef CPUAVR_HAS_JMP_CALL}
    +  call get_pc_addr
    +{$else  CPUAVR_HAS_JMP_CALL}
    +  rcall get_pc_addr
    +{$endif CPUAVR_HAS_JMP_CALL}
    +  movw R20, R24
    +{$ifdef CPUAVR_HAS_JMP_CALL}
    +  call get_frame
    +{$else  CPUAVR_HAS_JMP_CALL}
    +  rcall get_frame
    +{$endif CPUAVR_HAS_JMP_CALL}
    +  movw R18, R24
    +  ldi R22, 200
    +  clr R23
    +  clr R24
    +  clr R25
    +{$ifdef CPUAVR_HAS_JMP_CALL}
    +  call HandleErrorAddrFrameInd
    +{$else  CPUAVR_HAS_JMP_CALL}
    +  rcall HandleErrorAddrFrameInd
    +{$endif CPUAVR_HAS_JMP_CALL}
    +
    +.LNonZero:
    +
    +  push R17
    +  push R16
    +  push R15
    +  push R14
    +  push R13
    +  push R12
    +  push R11
    +  push R10
    +  push R9
    +  push R8
    +  push R7
    +  push R6
    +  push R5
    +  push R4
    +  push R3
    +  push R2
    +
    +  ldi R27, 1
    +start:            // Start of division...
    +  clr R9          // clear remainder
    +  clr R8
    +  clr R7
    +  clr R6
    +  clr R5
    +  clr R4
    +  clr R3
    +  clr R2
    +  ldi R26, 64     // iterate over 64 bits
    +
    +div1:
    +  lsl R10         // shift left A_L
    +  rol R11
    +  rol R12
    +  rol R13
    +  rol R14
    +  rol R15
    +  rol R16
    +  rol R17
    +
    +  rol R2          // shift left P with carry from A shift
    +  rol R3
    +  rol R4
    +  rol R5
    +  rol R6
    +  rol R7
    +  rol R8
    +  rol R9
    +
    +  sub R2, R18     // Subtract B from P, P <= P - B
    +  sbc R3, R19
    +  sbc R4, R20
    +  sbc R5, R21
    +  sbc R6, R22
    +  sbc R7, R23
    +  sbc R8, R24
    +  sbc R9, R25
    +
    +  brlo div2
    +  or R10, R27     // Set A[0] = 1
    +  rjmp div3
    +div2:             // negative branch, A[0] = 0 (default after shift), restore P
    +
    +  add R2, R18     // restore old value of P
    +  adc R3, R19
    +  adc R4, R20
    +  adc R5, R21
    +  adc R6, R22
    +  adc R7, R23
    +  adc R8, R24
    +  adc R9, R25
    +
    +div3:
    +  dec R26
    +  breq finish
    +  rjmp div1
    +
    +finish:
    +  mov R25, R9     // Move answer from R9..2 to R25..18
    +  mov R24, R8
    +  mov R23, R7
    +  mov R22, R6
    +  mov R21, R5
    +  mov R20, R4
    +  mov R19, R3
    +  mov R18, R2
    +
    +  pop R2
    +  pop R3
    +  pop R4
    +  pop R5
    +  pop R6
    +  pop R7
    +  pop R8
    +  pop R9
    +  pop R10
    +  pop R11
    +  pop R12
    +  pop R13
    +  pop R14
    +  pop R15
    +  pop R16
    +  pop R17
    +end;
    +function fpc_mod_qword(n,z : qword): qword; external name 'FPC_MOD_QWORD';
    +
    +
    +{$define FPC_SYSTEM_HAS_DIV_INT64}
    +function fpc_div_int64(n,z : int64) : int64; nostackframe; assembler; [public,alias: 'FPC_DIV_INT64']; compilerproc;
    +label
    +  pos1, pos2, fin;
    +asm
    +// Convert n, z to unsigned int, then call div_qword,
    +// Restore sign if high bits of n xor z is negative
    +// n       divisor     R25, R24, R23, R22, R21, R20, R19, R18
    +// z       dividend    R17, R16, R15, R14, R13, R12, R11, R10
    +//         neg_result  R30
    +//         one         R31
    +
    +  mov R30, R17    // store hi8(z)
    +  eor R30, R25  // hi8(z) XOR hi8(n), answer must be negative if MSB set
    +
    +  // convert n to absolute
    +  ldi R31, 1      // 1 in R31 used later
    +  sub R25, r1     // subtract 0, just to check sign flag
    +  brpl pos1
    +  com R25
    +  com R24
    +  com R23
    +  com R22
    +  com R21
    +  com R20
    +  com R19
    +  com R18
    +  add R18, R31    // add 1
    +  adc R19, R1     // add carry bit
    +  adc R20, R1
    +  adc R21, R1
    +  adc R22, R1
    +  adc R23, R1
    +  adc R24, R1
    +  adc R25, R1
    +  pos1:
    +
    +  sub R17, R1
    +  brpl pos2
    +  com R17
    +  com R16
    +  com R15
    +  com R14
    +  com R13
    +  com R12
    +  com R11
    +  com R10
    +  add R10, R31
    +  adc R11, R1
    +  adc R12, R1
    +  adc R13, R1
    +  adc R14, R1
    +  adc R15, R1
    +  adc R16, R1
    +  adc R17, R1
    +  pos2:
    +
    +  rcall fpc_div_qword
    +
    +  sbrs R30, 7     // skip if bit 7 is cleared (result should be positive)
    +  rjmp fin
    +  com R25         // result from FPC_DIV_WORD in R25 ... R22
    +  com R24
    +  com R23
    +  com R22
    +  com R21
    +  com R20
    +  com R19
    +  com R18
    +
    +  ldi R31, 1
    +  add R18, R31    // add 1
    +  adc R19, R1     // add carry bit
    +  adc R20, R1
    +  adc R21, R1
    +  adc R22, R1
    +  adc R23, R1
    +  adc R24, R1
    +  adc R25, R1
    +  fin:
    +end;
    +
    +{$define FPC_SYSTEM_HAS_MOD_INT64}
    +function fpc_mod_int64(n,z : int64) : int64; nostackframe; assembler; [public,alias: 'FPC_MOD_INT64']; compilerproc;
    +label
    +  pos1, pos2, fin;
    +asm
    +// Convert n, z to unsigned int, then call mod_qword,
    +// Restore sign if high bits of n xor z is negative
    +// n       divisor     R25, R24, R23, R22, R21, R20, R19, R18
    +// z       dividend    R17, R16, R15, R14, R13, R12, R11, R10
    +//         neg_result  R30
    +//         one         R31
    +
    +  mov R30, R17  // store hi8(z)
    +
    +  // convert n to absolute
    +  ldi R31, 1
    +  sub R25, r1     // subtract 0, just to check sign flag
    +  brpl pos1
    +  com R25
    +  com R24
    +  com R23
    +  com R22
    +  com R21
    +  com R20
    +  com R19
    +  com R18
    +  add R18, R31    // add 1
    +  adc R19, R1     // add carry bit
    +  adc R20, R1
    +  adc R21, R1
    +  adc R22, R1
    +  adc R23, R1
    +  adc R24, R1
    +  adc R25, R1
    +  pos1:
    +
    +  sub R17, R1
    +  brpl pos2
    +  com R17
    +  com R16
    +  com R15
    +  com R14
    +  com R13
    +  com R12
    +  com R11
    +  com R10
    +  add R10, R31
    +  adc R11, R1
    +  adc R12, R1
    +  adc R13, R1
    +  adc R14, R1
    +  adc R15, R1
    +  adc R16, R1
    +  adc R17, R1
    +  pos2:
    +
    +  rcall fpc_mod_qword
    +
    +  sbrs R30, 7     // Not finished if sign bit is set
    +  rjmp fin
    +  com R25         // Convert to 2's complement
    +  com R24         // Complement all bits...
    +  com R23
    +  com R22
    +  com R21
    +  com R20
    +  com R19
    +  com R18
    +  ldi R31, 1
    +  add R18, R31    // ...and add 1 to answer
    +  adc R19, R1
    +  adc R20, R1
    +  adc R21, R1
    +  adc R22, R1
    +  adc R23, R1
    +  adc R24, R1
    +  adc R25, R1
    +  fin:
    +end;
    
    int64p.inc.patch (12,023 bytes)

Activities

Christo Crause

2019-06-08 12:59

reporter  

int64.patch (12,023 bytes)
Index: rtl/avr/int64p.inc
===================================================================
--- rtl/avr/int64p.inc	(revision 42190)
+++ rtl/avr/int64p.inc	(working copy)
@@ -12,3 +12,577 @@
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
  **********************************************************************}
+
+{$define FPC_SYSTEM_HAS_SHR_QWORD}
+// Simplistic version with checking if whole bytes can be shifted
+// Doesn't change bitshift portion even if possible because of byteshift
+// Shorter code but not shortest execution time version
+function fpc_shr_qword(value: qword; shift: sizeint): qword; assembler; nostackframe;
+  [public, alias: 'FPC_SHR_QWORD']; compilerproc;
+label
+  byteshift, bitshift, finish;
+asm
+// value passed in R25...R18
+// shift passed in R16
+// return value in R25...R18
+
+  push R16
+
+  andi R16, 63    // mask 64 bit relevant value per generic routine
+byteshift:
+  breq finish     // shift = 0, finished
+  cpi R16, 8      // Check if shift is at least a byte
+  brlo bitshift
+  mov R18, R19    // if so, then shift all bytes right by 1 position
+  mov R19, R20
+  mov R20, R21
+  mov R21, R22
+  mov R22, R23
+  mov R23, R24
+  mov R24, R25
+  clr R25         // and clear the high byte
+  subi R16, 8     // subtract 8 bits from shift
+  rjmp byteshift  // check if another byte can be shifted
+
+bitshift:         // shift all 8 bytes right by 1 bit
+  lsr R25
+  ror R24
+  ror R23
+  ror R22
+  ror R21
+  ror R20
+  ror R19
+  ror R18
+  dec R16
+  brne bitshift   // until R16 = 0
+
+finish:
+  pop R16
+end;
+function fpc_shr_qword(value: qword; shift: sizeint): qword; external name 'FPC_SHR_QWORD';
+
+{$define FPC_SYSTEM_HAS_SHL_QWORD}
+function fpc_shl_qword(value: qword; shift: sizeint): qword; assembler; nostackframe;
+[public, alias: 'FPC_SHL_QWORD']; compilerproc;
+label
+  byteshift, bitshift, finish;
+asm
+// value passed in R25...R18
+// shift passed in R16
+// return value in R25...R18
+  push R16
+
+  andi R16, 63    // mask 64 bit relevant value per generic routine
+byteshift:
+  breq finish     // shift = 0, finished
+  cpi R16, 8      // Check if shift is at least a byte
+  brlo bitshift
+  mov R25, R24    // if so, then shift all bytes left by 1 position
+  mov R24, R23
+  mov R23, R22
+  mov R22, R21
+  mov R21, R20
+  mov R20, R19
+  mov R19, R18
+  clr R18         // and clear the high byte
+  subi R16, 8     // subtract 8 bits from shift
+  rjmp byteshift  // check if another byte can be shifted
+
+bitshift:         // shift all 8 bytes left by 1 bit
+  lsl R18
+  rol R19
+  rol R20
+  rol R21
+  rol R22
+  rol R23
+  rol R24
+  rol R25
+
+  dec R16
+  brne bitshift   // until R16 = 0
+
+finish:
+  pop R16
+end;
+
+function fpc_shl_qword(value: qword; shift: sizeint): qword; external name 'FPC_SHL_QWORD';
+
+{$define FPC_SYSTEM_HAS_SHL_INT64}
+function fpc_shl_int64(value: int64; shift: sizeint): int64;
+  [public, alias: 'FPC_SHL_INT64']; compilerproc; inline;
+begin
+  Result := fpc_shl_qword(qword(value), shift);
+end;
+
+{$define FPC_SYSTEM_HAS_SHR_INT64}
+// shr of signed int is same as shr of unsigned int (logical shift right)
+function fpc_shr_int64(value: int64; shift: sizeint): int64; [public, alias: 'FPC_SHR_INT64']; compilerproc;
+begin
+  Result := fpc_shr_qword(qword(value), shift);
+end;
+
+{$define FPC_SYSTEM_HAS_DIV_QWORD}
+function fpc_div_qword(n,z : qword): qword; nostackframe; assembler; [public,alias: 'FPC_DIV_QWORD']; compilerproc;
+label
+  start, div1, div2, div3, finish;
+asm
+// Symbol  Name        Register(s)
+// z (A)   dividend    R17, R16, R15, R14, R13, R12, R11, R10
+// n (B)   divisor     R25, R24, R23, R22, R21, R20, R19, R18
+// r (P)   remainder   R9,  R8,  R7,  R6,  R5,  R4,  R3,  R2
+// i       counter     R26
+//         1           R27
+
+  cp R25, R1
+  cpc R24, R1
+  cpc R23, R1
+  cpc R22, R1
+  cpc R21, R1
+  cpc R20, R1
+  cpc R19, R1
+  cpc R18, R1
+
+  brne .LNonZero
+{$ifdef CPUAVR_HAS_JMP_CALL}
+  call get_pc_addr
+{$else  CPUAVR_HAS_JMP_CALL}
+  rcall get_pc_addr
+{$endif CPUAVR_HAS_JMP_CALL}
+  movw R20, R24
+{$ifdef CPUAVR_HAS_JMP_CALL}
+  call get_frame
+{$else  CPUAVR_HAS_JMP_CALL}
+  rcall get_frame
+{$endif CPUAVR_HAS_JMP_CALL}
+  movw R18, R24
+  ldi R22, 200
+  clr R23
+  clr R24
+  clr R25
+{$ifdef CPUAVR_HAS_JMP_CALL}
+  call HandleErrorAddrFrameInd
+{$else  CPUAVR_HAS_JMP_CALL}
+  rcall HandleErrorAddrFrameInd
+{$endif CPUAVR_HAS_JMP_CALL}
+
+.LNonZero:
+
+  push R17
+  push R16
+  push R15
+  push R14
+  push R13
+  push R12
+  push R11
+  push R10
+  push R9
+  push R8
+  push R7
+  push R6
+  push R5
+  push R4
+  push R3
+  push R2
+
+  ldi R27, 1      // needed below for OR instruction
+
+start:            // Start of division...
+  clr R9          // clear remainder
+  clr R8
+  clr R7
+  clr R6
+  clr R5
+  clr R4
+  clr R3
+  clr R2
+  ldi R26, 64     // iterate over 64 bits
+
+div1:
+  lsl R10         // shift left A_L
+  rol R11
+  rol R12
+  rol R13
+  rol R14
+  rol R15
+  rol R16
+  rol R17
+
+  rol R2          // shift left P with carry from A shift
+  rol R3
+  rol R4
+  rol R5
+  rol R6
+  rol R7
+  rol R8
+  rol R9
+
+  sub R2, R18     // Subtract B from P, P <= P - B
+  sbc R3, R19
+  sbc R4, R20
+  sbc R5, R21
+  sbc R6, R22
+  sbc R7, R23
+  sbc R8, R24
+  sbc R9, R25
+
+  brlo div2
+  or R10, R27     // Set A[0] = 1
+  rjmp div3
+div2:             // negative branch, A[0] = 0 (default after shift), restore P
+
+  add R2, R18     // restore old value of P
+  adc R3, R19
+  adc R4, R20
+  adc R5, R21
+  adc R6, R22
+  adc R7, R23
+  adc R8, R24
+  adc R9, R25
+
+div3:
+  dec R26
+  breq finish
+  rjmp div1
+
+finish:
+  mov R25, R17    // Move answer from R17..10 to R25..18
+  mov R24, R16
+  mov R23, R15
+  mov R22, R14
+  mov R21, R13
+  mov R20, R12
+  mov R19, R11
+  mov R18, R10
+
+  pop R2
+  pop R3
+  pop R4
+  pop R5
+  pop R6
+  pop R7
+  pop R8
+  pop R9
+  pop R10
+  pop R11
+  pop R12
+  pop R13
+  pop R14
+  pop R15
+  pop R16
+  pop R17
+end;
+function fpc_div_qword(n,z : qword): qword; external name 'FPC_DIV_QWORD';
+
+{$define FPC_SYSTEM_HAS_MOD_QWORD}
+function fpc_mod_qword(n,z : qword): qword; nostackframe; assembler; [public,alias: 'FPC_MOD_QWORD']; compilerproc;
+label
+  start, div1, div2, div3, finish;
+asm
+// Symbol  Name        Register(s)
+// z (A)   dividend    R17, R16, R15, R14, R13, R12, R11, R10
+// n (B)   divisor     R25, R24, R23, R22, R21, R20, R19, R18
+// r (P)   remainder   R9,  R8,  R7,  R6,  R5,  R4,  R3,  R2
+// i	   counter     R26
+//         1           R27
+
+  cp R25, R1
+  cpc R24, R1
+  cpc R23, R1
+  cpc R22, R1
+  cpc R21, R1
+  cpc R20, R1
+  cpc R19, R1
+  cpc R18, R1
+
+  brne .LNonZero
+{$ifdef CPUAVR_HAS_JMP_CALL}
+  call get_pc_addr
+{$else  CPUAVR_HAS_JMP_CALL}
+  rcall get_pc_addr
+{$endif CPUAVR_HAS_JMP_CALL}
+  movw R20, R24
+{$ifdef CPUAVR_HAS_JMP_CALL}
+  call get_frame
+{$else  CPUAVR_HAS_JMP_CALL}
+  rcall get_frame
+{$endif CPUAVR_HAS_JMP_CALL}
+  movw R18, R24
+  ldi R22, 200
+  clr R23
+  clr R24
+  clr R25
+{$ifdef CPUAVR_HAS_JMP_CALL}
+  call HandleErrorAddrFrameInd
+{$else  CPUAVR_HAS_JMP_CALL}
+  rcall HandleErrorAddrFrameInd
+{$endif CPUAVR_HAS_JMP_CALL}
+
+.LNonZero:
+
+  push R17
+  push R16
+  push R15
+  push R14
+  push R13
+  push R12
+  push R11
+  push R10
+  push R9
+  push R8
+  push R7
+  push R6
+  push R5
+  push R4
+  push R3
+  push R2
+
+  ldi R27, 1
+start:            // Start of division...
+  clr R9          // clear remainder
+  clr R8
+  clr R7
+  clr R6
+  clr R5
+  clr R4
+  clr R3
+  clr R2
+  ldi R26, 64     // iterate over 64 bits
+
+div1:
+  lsl R10         // shift left A_L
+  rol R11
+  rol R12
+  rol R13
+  rol R14
+  rol R15
+  rol R16
+  rol R17
+
+  rol R2          // shift left P with carry from A shift
+  rol R3
+  rol R4
+  rol R5
+  rol R6
+  rol R7
+  rol R8
+  rol R9
+
+  sub R2, R18     // Subtract B from P, P <= P - B
+  sbc R3, R19
+  sbc R4, R20
+  sbc R5, R21
+  sbc R6, R22
+  sbc R7, R23
+  sbc R8, R24
+  sbc R9, R25
+
+  brlo div2
+  or R10, R27     // Set A[0] = 1
+  rjmp div3
+div2:             // negative branch, A[0] = 0 (default after shift), restore P
+
+  add R2, R18     // restore old value of P
+  adc R3, R19
+  adc R4, R20
+  adc R5, R21
+  adc R6, R22
+  adc R7, R23
+  adc R8, R24
+  adc R9, R25
+
+div3:
+  dec R26
+  breq finish
+  rjmp div1
+
+finish:
+  mov R25, R9     // Move answer from R9..2 to R25..18
+  mov R24, R8
+  mov R23, R7
+  mov R22, R6
+  mov R21, R5
+  mov R20, R4
+  mov R19, R3
+  mov R18, R2
+
+  pop R2
+  pop R3
+  pop R4
+  pop R5
+  pop R6
+  pop R7
+  pop R8
+  pop R9
+  pop R10
+  pop R11
+  pop R12
+  pop R13
+  pop R14
+  pop R15
+  pop R16
+  pop R17
+end;
+function fpc_mod_qword(n,z : qword): qword; external name 'FPC_MOD_QWORD';
+
+
+{$define FPC_SYSTEM_HAS_DIV_INT64}
+function fpc_div_int64(n,z : int64) : int64; nostackframe; assembler; [public,alias: 'FPC_DIV_INT64']; compilerproc;
+label
+  pos1, pos2, fin;
+asm
+// Convert n, z to unsigned int, then call div_qword,
+// Restore sign if high bits of n xor z is negative
+// n       divisor     R25, R24, R23, R22, R21, R20, R19, R18
+// z       dividend    R17, R16, R15, R14, R13, R12, R11, R10
+//         neg_result  R30
+//         one         R31
+
+  mov R30, R17    // store hi8(z)
+  eor R30, R25  // hi8(z) XOR hi8(n), answer must be negative if MSB set
+
+  // convert n to absolute
+  ldi R31, 1      // 1 in R31 used later
+  sub R25, r1     // subtract 0, just to check sign flag
+  brpl pos1
+  com R25
+  com R24
+  com R23
+  com R22
+  com R21
+  com R20
+  com R19
+  com R18
+  add R18, R31    // add 1
+  adc R19, R1     // add carry bit
+  adc R20, R1
+  adc R21, R1
+  adc R22, R1
+  adc R23, R1
+  adc R24, R1
+  adc R25, R1
+  pos1:
+
+  sub R17, R1
+  brpl pos2
+  com R17
+  com R16
+  com R15
+  com R14
+  com R13
+  com R12
+  com R11
+  com R10
+  add R10, R31
+  adc R11, R1
+  adc R12, R1
+  adc R13, R1
+  adc R14, R1
+  adc R15, R1
+  adc R16, R1
+  adc R17, R1
+  pos2:
+
+  rcall fpc_div_qword
+
+  sbrs R30, 7     // skip if bit 7 is cleared (result should be positive)
+  rjmp fin
+  com R25         // result from FPC_DIV_WORD in R25 ... R22
+  com R24
+  com R23
+  com R22
+  com R21
+  com R20
+  com R19
+  com R18
+
+  ldi R31, 1
+  add R18, R31    // add 1
+  adc R19, R1     // add carry bit
+  adc R20, R1
+  adc R21, R1
+  adc R22, R1
+  adc R23, R1
+  adc R24, R1
+  adc R25, R1
+  fin:
+end;
+
+{$define FPC_SYSTEM_HAS_MOD_INT64}
+function fpc_mod_int64(n,z : int64) : int64; nostackframe; assembler; [public,alias: 'FPC_MOD_INT64']; compilerproc;
+label
+  pos1, pos2, fin;
+asm
+// Convert n, z to unsigned int, then call mod_qword,
+// Restore sign if high bits of n xor z is negative
+// n       divisor     R25, R24, R23, R22, R21, R20, R19, R18
+// z       dividend    R17, R16, R15, R14, R13, R12, R11, R10
+//         neg_result  R30
+//         one         R31
+
+  mov R30, R17  // store hi8(z)
+
+  // convert n to absolute
+  ldi R31, 1
+  sub R25, r1     // subtract 0, just to check sign flag
+  brpl pos1
+  com R25
+  com R24
+  com R23
+  com R22
+  com R21
+  com R20
+  com R19
+  com R18
+  add R18, R31    // add 1
+  adc R19, R1     // add carry bit
+  adc R20, R1
+  adc R21, R1
+  adc R22, R1
+  adc R23, R1
+  adc R24, R1
+  adc R25, R1
+  pos1:
+
+  sub R17, R1
+  brpl pos2
+  com R17
+  com R16
+  com R15
+  com R14
+  com R13
+  com R12
+  com R11
+  com R10
+  add R10, R31
+  adc R11, R1
+  adc R12, R1
+  adc R13, R1
+  adc R14, R1
+  adc R15, R1
+  adc R16, R1
+  adc R17, R1
+  pos2:
+
+  rcall fpc_mod_qword
+
+  sbrs R30, 7     // Not finished if sign bit is set
+  rjmp fin
+  com R25         // Convert to 2's complement
+  com R24         // Complement all bits...
+  com R23
+  com R22
+  com R21
+  com R20
+  com R19
+  com R18
+  ldi R31, 1
+  add R18, R31    // ...and add 1 to answer
+  adc R19, R1
+  adc R20, R1
+  adc R21, R1
+  adc R22, R1
+  adc R23, R1
+  adc R24, R1
+  adc R25, R1
+  fin:
+end;
int64.patch (12,023 bytes)

Christo Crause

2019-06-18 21:26

reporter   ~0116775

I noticed that the parameter signatures for the shift functions changed shortly after I prepared the patch. The second patch matches the new parameter signatures in trunk.

int64p.inc.patch (12,023 bytes)
Index: rtl/avr/int64p.inc
===================================================================
--- rtl/avr/int64p.inc	(revision 42245)
+++ rtl/avr/int64p.inc	(working copy)
@@ -12,3 +12,577 @@
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
  **********************************************************************}
+
+{$define FPC_SYSTEM_HAS_SHR_QWORD}
+// Simplistic version with checking if whole bytes can be shifted
+// Doesn't change bitshift portion even if possible because of byteshift
+// Shorter code but not shortest execution time version
+function fpc_shr_qword(value: qword; shift: ALUUInt): qword; assembler; nostackframe;
+  [public, alias: 'FPC_SHR_QWORD']; compilerproc;
+label
+  byteshift, bitshift, finish;
+asm
+// value passed in R25...R18
+// shift passed in R16
+// return value in R25...R18
+
+  push R16
+
+  andi R16, 63    // mask 64 bit relevant value per generic routine
+byteshift:
+  breq finish     // shift = 0, finished
+  cpi R16, 8      // Check if shift is at least a byte
+  brlo bitshift
+  mov R18, R19    // if so, then shift all bytes right by 1 position
+  mov R19, R20
+  mov R20, R21
+  mov R21, R22
+  mov R22, R23
+  mov R23, R24
+  mov R24, R25
+  clr R25         // and clear the high byte
+  subi R16, 8     // subtract 8 bits from shift
+  rjmp byteshift  // check if another byte can be shifted
+
+bitshift:         // shift all 8 bytes right by 1 bit
+  lsr R25
+  ror R24
+  ror R23
+  ror R22
+  ror R21
+  ror R20
+  ror R19
+  ror R18
+  dec R16
+  brne bitshift   // until R16 = 0
+
+finish:
+  pop R16
+end;
+function fpc_shr_qword(value: qword; shift: ALUUInt): qword; external name 'FPC_SHR_QWORD';
+
+{$define FPC_SYSTEM_HAS_SHL_QWORD}
+function fpc_shl_qword(value: qword; shift: ALUUInt): qword; assembler; nostackframe;
+[public, alias: 'FPC_SHL_QWORD']; compilerproc;
+label
+  byteshift, bitshift, finish;
+asm
+// value passed in R25...R18
+// shift passed in R16
+// return value in R25...R18
+  push R16
+
+  andi R16, 63    // mask 64 bit relevant value per generic routine
+byteshift:
+  breq finish     // shift = 0, finished
+  cpi R16, 8      // Check if shift is at least a byte
+  brlo bitshift
+  mov R25, R24    // if so, then shift all bytes left by 1 position
+  mov R24, R23
+  mov R23, R22
+  mov R22, R21
+  mov R21, R20
+  mov R20, R19
+  mov R19, R18
+  clr R18         // and clear the high byte
+  subi R16, 8     // subtract 8 bits from shift
+  rjmp byteshift  // check if another byte can be shifted
+
+bitshift:         // shift all 8 bytes left by 1 bit
+  lsl R18
+  rol R19
+  rol R20
+  rol R21
+  rol R22
+  rol R23
+  rol R24
+  rol R25
+
+  dec R16
+  brne bitshift   // until R16 = 0
+
+finish:
+  pop R16
+end;
+
+function fpc_shl_qword(value: qword; shift: ALUUInt): qword; external name 'FPC_SHL_QWORD';
+
+{$define FPC_SYSTEM_HAS_SHL_INT64}
+function fpc_shl_int64(value: int64; shift: ALUUInt): int64;
+  [public, alias: 'FPC_SHL_INT64']; compilerproc; inline;
+begin
+  Result := fpc_shl_qword(qword(value), shift);
+end;
+
+{$define FPC_SYSTEM_HAS_SHR_INT64}
+// shr of signed int is same as shr of unsigned int (logical shift right)
+function fpc_shr_int64(value: int64; shift: ALUUInt): int64; [public, alias: 'FPC_SHR_INT64']; compilerproc;
+begin
+  Result := fpc_shr_qword(qword(value), shift);
+end;
+
+{$define FPC_SYSTEM_HAS_DIV_QWORD}
+function fpc_div_qword(n,z : qword): qword; nostackframe; assembler; [public,alias: 'FPC_DIV_QWORD']; compilerproc;
+label
+  start, div1, div2, div3, finish;
+asm
+// Symbol  Name        Register(s)
+// z (A)   dividend    R17, R16, R15, R14, R13, R12, R11, R10
+// n (B)   divisor     R25, R24, R23, R22, R21, R20, R19, R18
+// r (P)   remainder   R9,  R8,  R7,  R6,  R5,  R4,  R3,  R2
+// i       counter     R26
+//         1           R27
+
+  cp R25, R1
+  cpc R24, R1
+  cpc R23, R1
+  cpc R22, R1
+  cpc R21, R1
+  cpc R20, R1
+  cpc R19, R1
+  cpc R18, R1
+
+  brne .LNonZero
+{$ifdef CPUAVR_HAS_JMP_CALL}
+  call get_pc_addr
+{$else  CPUAVR_HAS_JMP_CALL}
+  rcall get_pc_addr
+{$endif CPUAVR_HAS_JMP_CALL}
+  movw R20, R24
+{$ifdef CPUAVR_HAS_JMP_CALL}
+  call get_frame
+{$else  CPUAVR_HAS_JMP_CALL}
+  rcall get_frame
+{$endif CPUAVR_HAS_JMP_CALL}
+  movw R18, R24
+  ldi R22, 200
+  clr R23
+  clr R24
+  clr R25
+{$ifdef CPUAVR_HAS_JMP_CALL}
+  call HandleErrorAddrFrameInd
+{$else  CPUAVR_HAS_JMP_CALL}
+  rcall HandleErrorAddrFrameInd
+{$endif CPUAVR_HAS_JMP_CALL}
+
+.LNonZero:
+
+  push R17
+  push R16
+  push R15
+  push R14
+  push R13
+  push R12
+  push R11
+  push R10
+  push R9
+  push R8
+  push R7
+  push R6
+  push R5
+  push R4
+  push R3
+  push R2
+
+  ldi R27, 1      // needed below for OR instruction
+
+start:            // Start of division...
+  clr R9          // clear remainder
+  clr R8
+  clr R7
+  clr R6
+  clr R5
+  clr R4
+  clr R3
+  clr R2
+  ldi R26, 64     // iterate over 64 bits
+
+div1:
+  lsl R10         // shift left A_L
+  rol R11
+  rol R12
+  rol R13
+  rol R14
+  rol R15
+  rol R16
+  rol R17
+
+  rol R2          // shift left P with carry from A shift
+  rol R3
+  rol R4
+  rol R5
+  rol R6
+  rol R7
+  rol R8
+  rol R9
+
+  sub R2, R18     // Subtract B from P, P <= P - B
+  sbc R3, R19
+  sbc R4, R20
+  sbc R5, R21
+  sbc R6, R22
+  sbc R7, R23
+  sbc R8, R24
+  sbc R9, R25
+
+  brlo div2
+  or R10, R27     // Set A[0] = 1
+  rjmp div3
+div2:             // negative branch, A[0] = 0 (default after shift), restore P
+
+  add R2, R18     // restore old value of P
+  adc R3, R19
+  adc R4, R20
+  adc R5, R21
+  adc R6, R22
+  adc R7, R23
+  adc R8, R24
+  adc R9, R25
+
+div3:
+  dec R26
+  breq finish
+  rjmp div1
+
+finish:
+  mov R25, R17    // Move answer from R17..10 to R25..18
+  mov R24, R16
+  mov R23, R15
+  mov R22, R14
+  mov R21, R13
+  mov R20, R12
+  mov R19, R11
+  mov R18, R10
+
+  pop R2
+  pop R3
+  pop R4
+  pop R5
+  pop R6
+  pop R7
+  pop R8
+  pop R9
+  pop R10
+  pop R11
+  pop R12
+  pop R13
+  pop R14
+  pop R15
+  pop R16
+  pop R17
+end;
+function fpc_div_qword(n,z : qword): qword; external name 'FPC_DIV_QWORD';
+
+{$define FPC_SYSTEM_HAS_MOD_QWORD}
+function fpc_mod_qword(n,z : qword): qword; nostackframe; assembler; [public,alias: 'FPC_MOD_QWORD']; compilerproc;
+label
+  start, div1, div2, div3, finish;
+asm
+// Symbol  Name        Register(s)
+// z (A)   dividend    R17, R16, R15, R14, R13, R12, R11, R10
+// n (B)   divisor     R25, R24, R23, R22, R21, R20, R19, R18
+// r (P)   remainder   R9,  R8,  R7,  R6,  R5,  R4,  R3,  R2
+// i	   counter     R26
+//         1           R27
+
+  cp R25, R1
+  cpc R24, R1
+  cpc R23, R1
+  cpc R22, R1
+  cpc R21, R1
+  cpc R20, R1
+  cpc R19, R1
+  cpc R18, R1
+
+  brne .LNonZero
+{$ifdef CPUAVR_HAS_JMP_CALL}
+  call get_pc_addr
+{$else  CPUAVR_HAS_JMP_CALL}
+  rcall get_pc_addr
+{$endif CPUAVR_HAS_JMP_CALL}
+  movw R20, R24
+{$ifdef CPUAVR_HAS_JMP_CALL}
+  call get_frame
+{$else  CPUAVR_HAS_JMP_CALL}
+  rcall get_frame
+{$endif CPUAVR_HAS_JMP_CALL}
+  movw R18, R24
+  ldi R22, 200
+  clr R23
+  clr R24
+  clr R25
+{$ifdef CPUAVR_HAS_JMP_CALL}
+  call HandleErrorAddrFrameInd
+{$else  CPUAVR_HAS_JMP_CALL}
+  rcall HandleErrorAddrFrameInd
+{$endif CPUAVR_HAS_JMP_CALL}
+
+.LNonZero:
+
+  push R17
+  push R16
+  push R15
+  push R14
+  push R13
+  push R12
+  push R11
+  push R10
+  push R9
+  push R8
+  push R7
+  push R6
+  push R5
+  push R4
+  push R3
+  push R2
+
+  ldi R27, 1
+start:            // Start of division...
+  clr R9          // clear remainder
+  clr R8
+  clr R7
+  clr R6
+  clr R5
+  clr R4
+  clr R3
+  clr R2
+  ldi R26, 64     // iterate over 64 bits
+
+div1:
+  lsl R10         // shift left A_L
+  rol R11
+  rol R12
+  rol R13
+  rol R14
+  rol R15
+  rol R16
+  rol R17
+
+  rol R2          // shift left P with carry from A shift
+  rol R3
+  rol R4
+  rol R5
+  rol R6
+  rol R7
+  rol R8
+  rol R9
+
+  sub R2, R18     // Subtract B from P, P <= P - B
+  sbc R3, R19
+  sbc R4, R20
+  sbc R5, R21
+  sbc R6, R22
+  sbc R7, R23
+  sbc R8, R24
+  sbc R9, R25
+
+  brlo div2
+  or R10, R27     // Set A[0] = 1
+  rjmp div3
+div2:             // negative branch, A[0] = 0 (default after shift), restore P
+
+  add R2, R18     // restore old value of P
+  adc R3, R19
+  adc R4, R20
+  adc R5, R21
+  adc R6, R22
+  adc R7, R23
+  adc R8, R24
+  adc R9, R25
+
+div3:
+  dec R26
+  breq finish
+  rjmp div1
+
+finish:
+  mov R25, R9     // Move answer from R9..2 to R25..18
+  mov R24, R8
+  mov R23, R7
+  mov R22, R6
+  mov R21, R5
+  mov R20, R4
+  mov R19, R3
+  mov R18, R2
+
+  pop R2
+  pop R3
+  pop R4
+  pop R5
+  pop R6
+  pop R7
+  pop R8
+  pop R9
+  pop R10
+  pop R11
+  pop R12
+  pop R13
+  pop R14
+  pop R15
+  pop R16
+  pop R17
+end;
+function fpc_mod_qword(n,z : qword): qword; external name 'FPC_MOD_QWORD';
+
+
+{$define FPC_SYSTEM_HAS_DIV_INT64}
+function fpc_div_int64(n,z : int64) : int64; nostackframe; assembler; [public,alias: 'FPC_DIV_INT64']; compilerproc;
+label
+  pos1, pos2, fin;
+asm
+// Convert n, z to unsigned int, then call div_qword,
+// Restore sign if high bits of n xor z is negative
+// n       divisor     R25, R24, R23, R22, R21, R20, R19, R18
+// z       dividend    R17, R16, R15, R14, R13, R12, R11, R10
+//         neg_result  R30
+//         one         R31
+
+  mov R30, R17    // store hi8(z)
+  eor R30, R25  // hi8(z) XOR hi8(n), answer must be negative if MSB set
+
+  // convert n to absolute
+  ldi R31, 1      // 1 in R31 used later
+  sub R25, r1     // subtract 0, just to check sign flag
+  brpl pos1
+  com R25
+  com R24
+  com R23
+  com R22
+  com R21
+  com R20
+  com R19
+  com R18
+  add R18, R31    // add 1
+  adc R19, R1     // add carry bit
+  adc R20, R1
+  adc R21, R1
+  adc R22, R1
+  adc R23, R1
+  adc R24, R1
+  adc R25, R1
+  pos1:
+
+  sub R17, R1
+  brpl pos2
+  com R17
+  com R16
+  com R15
+  com R14
+  com R13
+  com R12
+  com R11
+  com R10
+  add R10, R31
+  adc R11, R1
+  adc R12, R1
+  adc R13, R1
+  adc R14, R1
+  adc R15, R1
+  adc R16, R1
+  adc R17, R1
+  pos2:
+
+  rcall fpc_div_qword
+
+  sbrs R30, 7     // skip if bit 7 is cleared (result should be positive)
+  rjmp fin
+  com R25         // result from FPC_DIV_WORD in R25 ... R22
+  com R24
+  com R23
+  com R22
+  com R21
+  com R20
+  com R19
+  com R18
+
+  ldi R31, 1
+  add R18, R31    // add 1
+  adc R19, R1     // add carry bit
+  adc R20, R1
+  adc R21, R1
+  adc R22, R1
+  adc R23, R1
+  adc R24, R1
+  adc R25, R1
+  fin:
+end;
+
+{$define FPC_SYSTEM_HAS_MOD_INT64}
+function fpc_mod_int64(n,z : int64) : int64; nostackframe; assembler; [public,alias: 'FPC_MOD_INT64']; compilerproc;
+label
+  pos1, pos2, fin;
+asm
+// Convert n, z to unsigned int, then call mod_qword,
+// Restore sign if high bits of n xor z is negative
+// n       divisor     R25, R24, R23, R22, R21, R20, R19, R18
+// z       dividend    R17, R16, R15, R14, R13, R12, R11, R10
+//         neg_result  R30
+//         one         R31
+
+  mov R30, R17  // store hi8(z)
+
+  // convert n to absolute
+  ldi R31, 1
+  sub R25, r1     // subtract 0, just to check sign flag
+  brpl pos1
+  com R25
+  com R24
+  com R23
+  com R22
+  com R21
+  com R20
+  com R19
+  com R18
+  add R18, R31    // add 1
+  adc R19, R1     // add carry bit
+  adc R20, R1
+  adc R21, R1
+  adc R22, R1
+  adc R23, R1
+  adc R24, R1
+  adc R25, R1
+  pos1:
+
+  sub R17, R1
+  brpl pos2
+  com R17
+  com R16
+  com R15
+  com R14
+  com R13
+  com R12
+  com R11
+  com R10
+  add R10, R31
+  adc R11, R1
+  adc R12, R1
+  adc R13, R1
+  adc R14, R1
+  adc R15, R1
+  adc R16, R1
+  adc R17, R1
+  pos2:
+
+  rcall fpc_mod_qword
+
+  sbrs R30, 7     // Not finished if sign bit is set
+  rjmp fin
+  com R25         // Convert to 2's complement
+  com R24         // Complement all bits...
+  com R23
+  com R22
+  com R21
+  com R20
+  com R19
+  com R18
+  ldi R31, 1
+  add R18, R31    // ...and add 1 to answer
+  adc R19, R1
+  adc R20, R1
+  adc R21, R1
+  adc R22, R1
+  adc R23, R1
+  adc R24, R1
+  adc R25, R1
+  fin:
+end;
int64p.inc.patch (12,023 bytes)

Jeppe Johansen

2019-07-25 17:39

developer   ~0117396

Thanks for the patch

Christo Crause

2019-07-26 21:27

reporter   ~0117424

Thanks Jeppe.

Issue History

Date Modified Username Field Change
2019-06-08 12:59 Christo Crause New Issue
2019-06-08 12:59 Christo Crause File Added: int64.patch
2019-06-08 16:50 Dimitrios Chr. Ioannidis Tag Attached: AVR
2019-06-18 21:26 Christo Crause File Added: int64p.inc.patch
2019-06-18 21:26 Christo Crause Note Added: 0116775
2019-07-25 17:39 Jeppe Johansen Assigned To => Jeppe Johansen
2019-07-25 17:39 Jeppe Johansen Status new => resolved
2019-07-25 17:39 Jeppe Johansen Resolution open => fixed
2019-07-25 17:39 Jeppe Johansen Fixed in Version => 3.3.1
2019-07-25 17:39 Jeppe Johansen Fixed in Revision => 42495
2019-07-25 17:39 Jeppe Johansen FPCTarget => -
2019-07-25 17:39 Jeppe Johansen Note Added: 0117396
2019-07-26 21:27 Christo Crause Status resolved => closed
2019-07-26 21:27 Christo Crause Note Added: 0117424