summaryrefslogtreecommitdiff
path: root/libmp/Plan9-power/mpvecdigmuladd.s
diff options
context:
space:
mode:
Diffstat (limited to 'libmp/Plan9-power/mpvecdigmuladd.s')
-rw-r--r--libmp/Plan9-power/mpvecdigmuladd.s56
1 files changed, 56 insertions, 0 deletions
diff --git a/libmp/Plan9-power/mpvecdigmuladd.s b/libmp/Plan9-power/mpvecdigmuladd.s
new file mode 100644
index 00000000..b86f7ec7
--- /dev/null
+++ b/libmp/Plan9-power/mpvecdigmuladd.s
@@ -0,0 +1,56 @@
+#define BDNZ BC 16,0,
+#define BDNE BC 0,2,
+
+/*
+ * mpvecdigmuladd(mpdigit *b, int n, mpdigit m, mpdigit *p)
+ *
+ * p += b*m
+ *
+ * each step looks like:
+ * hi,lo = m*b[i]
+ * lo += oldhi + carry
+ * hi += carry
+ * p[i] += lo
+ * oldhi = hi
+ *
+ * the registers are:
+ * b = R3
+ * n = R4
+ * m = R5
+ * p = R6
+ * i = R7
+ * hi = R8 - constrained by hardware
+ * lo = R9 - constrained by hardware
+ * oldhi = R10
+ * tmp = R11
+ *
+ */
+TEXT mpvecdigmuladd(SB),$0
+
+ MOVW n+4(FP),R4
+ MOVW m+8(FP),R5
+ MOVW p+12(FP),R6
+ SUB $4, R3 /* pre decrement for MOVWU's */
+ SUB $4, R6 /* pre decrement for MOVWU's */
+
+ MOVW R0, R10
+ MOVW R0, XER
+ MOVW R4, CTR
+_muladdloop:
+ MOVWU 4(R3),R9 /* lo = b[i] */
+ MOVW 4(R6),R11 /* tmp = p[i] */
+ MULHWU R9,R5,R8 /* hi = (b[i] * m)>>32 */
+ MULLW R9,R5,R9 /* lo = b[i] * m */
+ ADDC R10,R9 /* lo += oldhi */
+ ADDE R0,R8 /* hi += carry */
+ ADDC R9,R11 /* tmp += lo */
+ ADDE R0,R8 /* hi += carry */
+ MOVWU R11,4(R6) /* p[i] = tmp */
+ MOVW R8,R10 /* oldhi = hi */
+ BDNZ _muladdloop
+
+ MOVW 4(R6),R11 /* tmp = p[i] */
+ ADDC R10,R11
+ MOVWU R11,4(R6) /* p[i] = tmp */
+
+ RETURN