summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@linux.intel.com>2012-03-21 00:28:48 (GMT)
committerH. Peter Anvin <hpa@linux.intel.com>2012-03-21 00:28:48 (GMT)
commit6d5989ec0349888d22271b5c69dfe15205a92366 (patch)
tree3fa6e4d40cef38d9372fe44a35897959f6d7e332
parent96cd4cad404258337eee3cc81b43f94d7154a8cd (diff)
downloadpbn-6d5989ec0349888d22271b5c69dfe15205a92366.zip
pbn-6d5989ec0349888d22271b5c69dfe15205a92366.tar.gz
pbn-6d5989ec0349888d22271b5c69dfe15205a92366.tar.bz2
pbn-6d5989ec0349888d22271b5c69dfe15205a92366.tar.xz
pbn_mul: Fix the handling of carry
Fix the handling of carry in pbn_mul; adding in the already accumulated values may cause a carry, too. However, we will never overflow two limbs, because: if a = 2^(n-1) - 1 then a * a + a + a = 2^n - 1
-rw-r--r--pbn_mul.c30
1 files changed, 16 insertions, 14 deletions
diff --git a/pbn_mul.c b/pbn_mul.c
index 6b16bd2..4edc118 100644
--- a/pbn_mul.c
+++ b/pbn_mul.c
@@ -21,11 +21,13 @@
struct pbn *pbn_mul(struct pbn *s1, struct pbn *s2)
{
- size_t bits, len;
+ size_t i, j, imax, jmax;
struct pbn *d;
- int i, j;
+ pbn_limb_t *dp;
+ const pbn_limb_t *s2p;
pbn_limb_t x1;
- pbn_2limb_t c;
+ pbn_2limb_t t;
+ pbn_limb_t c;
if (s1->bits == 0) {
pbn_free(s2);
@@ -35,22 +37,22 @@ struct pbn *pbn_mul(struct pbn *s1, struct pbn *s2)
return s2; /* s2 == 0 */
}
- bits = s1->bits + s2->bits;
- len = (bits+PBN_LIMB_BITS-1)/PBN_LIMB_BITS;
+ imax = (s1->bits+PBN_LIMB_BITS-1) >> PBN_LIMB_SHIFT;
+ jmax = (s2->bits+PBN_LIMB_BITS-1) >> PBN_LIMB_SHIFT;
- d = pbn_new(len);
- d->bits = bits;
+ d = pbn_new(imax+jmax);
- for (i = 0; i < s1->len; i++) {
+ for (i = 0; i < imax; i++) {
x1 = s1->num[i];
c = 0;
- for (j = 0; j < s2->len; j++) {
- c += (pbn_2limb_t)x1 * s2->num[j];
- d->num[i+j] += (pbn_limb_t)c;
- c >>= PBN_LIMB_BITS;
+ s2p = s2->num;
+ dp = &d->num[i];
+ for (j = 0; j < jmax; j++) {
+ t = (pbn_2limb_t)x1 * (*s2p++) + *dp + c;
+ *dp++ = (pbn_limb_t)t;
+ c = t >> PBN_LIMB_BITS;
}
- if (c)
- d->num[i+j] += (pbn_limb_t)c;
+ *dp = c;
}
d->minus = s1->minus ^ s2->minus;