diff options
Diffstat (limited to 'libmp/Inferno-amd64')
| -rw-r--r-- | libmp/Inferno-amd64/mkfile | 21 | ||||
| -rw-r--r-- | libmp/Inferno-amd64/mpdigdiv.s | 21 | ||||
| -rw-r--r-- | libmp/Inferno-amd64/mpvecadd.s | 54 | ||||
| -rw-r--r-- | libmp/Inferno-amd64/mpvecdigmuladd.s | 53 | ||||
| -rw-r--r-- | libmp/Inferno-amd64/mpvecdigmulsub.s | 54 | ||||
| -rw-r--r-- | libmp/Inferno-amd64/mpvecsub.s | 45 |
6 files changed, 248 insertions, 0 deletions
diff --git a/libmp/Inferno-amd64/mkfile b/libmp/Inferno-amd64/mkfile new file mode 100644 index 00000000..414e98c4 --- /dev/null +++ b/libmp/Inferno-amd64/mkfile @@ -0,0 +1,21 @@ +objtype=amd64 +OBJTYPE=$objtype +<../../mkconfig + +LIB=libmp.a +SFILES=\ + mpvecadd.s\ + mpvecdigmuladd.s\ + mpvecdigmulsub.s\ + mpvecsub.s\ + mpdigdiv.s\ + +HFILES=$ROOT/Inferno/$OBJTYPE/include/u.h $ROOT/include/mp.h ../port/dat.h + +OFILES=${SFILES:%.s=%.$O} + +UPDATE=mkfile\ + $HFILES\ + $SFILES\ + +<$ROOT/mkfiles/mksyslib-$SHELLTYPE diff --git a/libmp/Inferno-amd64/mpdigdiv.s b/libmp/Inferno-amd64/mpdigdiv.s new file mode 100644 index 00000000..6025d141 --- /dev/null +++ b/libmp/Inferno-amd64/mpdigdiv.s @@ -0,0 +1,21 @@ +TEXT mpdigdiv(SB),$0 + +/* MOVL dividend+0(FP),BX */ + MOVL 0(RARG),AX + MOVL 4(RARG),DX + MOVL divisor+8(FP),BX + MOVQ quotient+16(FP),DI + XORL CX,CX + CMPL DX,BX /* dividend >= 2^32 * divisor */ + JHS _divovfl + CMPL BX,CX /* divisor == 0 */ + JE _divovfl + DIVL BX /* AX = DX:AX/BX */ + MOVL AX,0(DI) + RET + + /* return all 1's */ +_divovfl: + NOTL CX + MOVL CX,0(DI) + RET diff --git a/libmp/Inferno-amd64/mpvecadd.s b/libmp/Inferno-amd64/mpvecadd.s new file mode 100644 index 00000000..326f39da --- /dev/null +++ b/libmp/Inferno-amd64/mpvecadd.s @@ -0,0 +1,54 @@ +/* + * mpvecadd(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *sum) + * + * sum[0:alen] = a[0:alen-1] + b[0:blen-1] + * + * prereq: alen >= blen, sum has room for alen+1 digits + */ +TEXT mpvecadd(SB),$0 + + MOVL alen+8(FP),DX + MOVL blen+24(FP),CX +/* MOVL a+0(FP),SI */ + MOVQ RARG, SI + MOVQ b+16(FP),BX + SUBL CX,DX + MOVQ sum+32(FP),DI + XORL BP,BP /* this also sets carry to 0 */ + + /* skip addition if b is zero */ + TESTL CX,CX + JZ _add1 + + /* sum[0:blen-1],carry = a[0:blen-1] + b[0:blen-1] */ +_addloop1: + MOVL (SI)(BP*4), AX + ADCL (BX)(BP*4), AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _addloop1 + +_add1: + /* jump if alen > blen */ + INCL DX + MOVL DX,CX + LOOP _addloop2 + + /* sum[alen] = carry */ +_addend: + JC _addcarry + MOVL $0,(DI)(BP*4) + RET +_addcarry: + MOVL $1,(DI)(BP*4) + RET + + /* sum[blen:alen-1],carry = a[blen:alen-1] + 0 */ +_addloop2: + MOVL (SI)(BP*4),AX + ADCL $0,AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _addloop2 + JMP _addend + diff --git a/libmp/Inferno-amd64/mpvecdigmuladd.s b/libmp/Inferno-amd64/mpvecdigmuladd.s new file mode 100644 index 00000000..6599a42c --- /dev/null +++ b/libmp/Inferno-amd64/mpvecdigmuladd.s @@ -0,0 +1,53 @@ +/* + * mpvecdigmul(mpdigit *b, int n, mpdigit m, mpdigit *p) + * + * p += b*m + * + * each step look like: + * hi,lo = m*b[i] + * lo += oldhi + carry + * hi += carry + * p[i] += lo + * oldhi = hi + * + * the registers are: + * hi = DX - constrained by hardware + * lo = AX - constrained by hardware + * b+n = SI - can't be BP + * p+n = DI - can't be BP + * i-n = BP + * m = BX + * oldhi = CX + * + */ +TEXT mpvecdigmuladd(SB),$0 + +/* MOVQ b+0(FP),SI */ + MOVQ RARG,SI + MOVL n+8(FP),CX + MOVL m+16(FP),BX + MOVQ p+24(FP),DI + MOVL CX,BP + NEGQ BP /* BP = -n */ + SHLL $2,CX + ADDQ CX,SI /* SI = b + n */ + ADDQ CX,DI /* DI = p + n */ + XORL CX,CX +_muladdloop: + MOVL (SI)(BP*4),AX /* lo = b[i] */ + MULL BX /* hi, lo = b[i] * m */ + ADDL CX,AX /* lo += oldhi */ + JCC _muladdnocarry1 + INCL DX /* hi += carry */ +_muladdnocarry1: + ADDL AX,(DI)(BP*4) /* p[i] += lo */ + JCC _muladdnocarry2 + INCL DX /* hi += carry */ +_muladdnocarry2: + MOVL DX,CX /* oldhi = hi */ + INCQ BP /* i++ */ + JNZ _muladdloop + XORL AX,AX + ADDL CX,(DI)(BP*4) /* p[n] + oldhi */ + ADCL AX,AX /* return carry out of p[n] */ + RET diff --git a/libmp/Inferno-amd64/mpvecdigmulsub.s b/libmp/Inferno-amd64/mpvecdigmulsub.s new file mode 100644 index 00000000..ea316431 --- /dev/null +++ b/libmp/Inferno-amd64/mpvecdigmulsub.s @@ -0,0 +1,54 @@ +/* + * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p) + * + * p -= b*m + * + * each step look like: + * hi,lo = m*b[i] + * lo += oldhi + carry + * hi += carry + * p[i] += lo + * oldhi = hi + * + * the registers are: + * hi = DX - constrained by hardware + * lo = AX - constrained by hardware + * b = SI - can't be BP + * p = DI - can't be BP + * i = BP + * n = CX - constrained by LOOP instr + * m = BX + * oldhi = EX + * + */ +TEXT mpvecdigmulsub(SB),$0 + +/* MOVL b+0(FP),SI */ + MOVQ RARG,SI + MOVL n+8(FP),CX + MOVL m+16(FP),BX + MOVQ p+24(FP),DI + XORL BP,BP + PUSHQ BP +_mulsubloop: + MOVL (SI)(BP*4),AX /* lo = b[i] */ + MULL BX /* hi, lo = b[i] * m */ + ADDL 0(SP),AX /* lo += oldhi */ + JCC _mulsubnocarry1 + INCL DX /* hi += carry */ +_mulsubnocarry1: + SUBL AX,(DI)(BP*4) + JCC _mulsubnocarry2 + INCL DX /* hi += carry */ +_mulsubnocarry2: + MOVL DX,0(SP) + INCL BP + LOOP _mulsubloop + POPQ AX + SUBL AX,(DI)(BP*4) + JCC _mulsubnocarry3 + MOVQ $-1,AX + RET +_mulsubnocarry3: + MOVQ $1,AX + RET diff --git a/libmp/Inferno-amd64/mpvecsub.s b/libmp/Inferno-amd64/mpvecsub.s new file mode 100644 index 00000000..9e1b5349 --- /dev/null +++ b/libmp/Inferno-amd64/mpvecsub.s @@ -0,0 +1,45 @@ +/* + * mpvecsub(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *diff) + * + * diff[0:alen-1] = a[0:alen-1] - b[0:blen-1] + * + * prereq: alen >= blen, diff has room for alen digits + */ +TEXT mpvecsub(SB),$0 + +/* MOVQ a+0(FP),SI */ + MOVQ RARG, SI + MOVQ b+16(FP),BX + MOVL alen+8(FP),DX + MOVL blen+24(FP),CX + MOVQ diff+32(FP),DI + SUBL CX,DX + XORL BP,BP /* this also sets carry to 0 */ + + /* skip subraction if b is zero */ + TESTL CX,CX + JZ _sub1 + + /* diff[0:blen-1],borrow = a[0:blen-1] - b[0:blen-1] */ +_subloop1: + MOVL (SI)(BP*4),AX + SBBL (BX)(BP*4),AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _subloop1 + +_sub1: + INCL DX + MOVL DX,CX + LOOP _subloop2 + RET + + /* diff[blen:alen-1] = a[blen:alen-1] - 0 */ +_subloop2: + MOVL (SI)(BP*4),AX + SBBL $0,AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _subloop2 + RET + |
