summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@linux.intel.com>2012-03-21 05:05:17 (GMT)
committerH. Peter Anvin <hpa@linux.intel.com>2012-03-21 05:05:17 (GMT)
commit231b6afea568146dd70047a3dca3ae3d6a4842d0 (patch)
treeb2e18adf52b9f316cb988c3057914e684133bf5d
parent6d5989ec0349888d22271b5c69dfe15205a92366 (diff)
downloadpbn-231b6afea568146dd70047a3dca3ae3d6a4842d0.zip
pbn-231b6afea568146dd70047a3dca3ae3d6a4842d0.tar.gz
pbn-231b6afea568146dd70047a3dca3ae3d6a4842d0.tar.bz2
pbn-231b6afea568146dd70047a3dca3ae3d6a4842d0.tar.xz
pbn_div: fix the handling of qhatHEADmaster
qhat can genuinely end up as a 33-bit number under certain circumstances, so we need to properly take it into account.
-rw-r--r--pbn_div.c85
1 files changed, 46 insertions, 39 deletions
diff --git a/pbn_div.c b/pbn_div.c
index 88cb060..b8dd2f3 100644
--- a/pbn_div.c
+++ b/pbn_div.c
@@ -14,17 +14,14 @@
* pbn_div.c
*/
-/* quotient, remainder, numerator, denominator
-
- The sign on the outputs = sign(numerator) XOR sign(denominator)
-
- Returns -1 on error (divide by zero) and frees the inputs.
-
- This is doing it the slow way, which is bitwise long division.
- If this turns out to be a bottleneck, then it should be replaced
- by smarter algorithms, but this algorithm should at least be
- correct, so it should be usable for now. */
-
+/*
+ * quotient, remainder, numerator, denominator
+ *
+ * The sign on the outputs = sign(numerator) XOR sign(denominator)
+ *
+ * Returns -1 on error (divide by zero) and frees the inputs.
+ * Returns 1 if the remainder is nonzero, otherwise 0.
+ */
#include "pbnint.h"
int pbn_div(struct pbn **qp, struct pbn **rp, struct pbn *n, struct pbn *d)
@@ -32,16 +29,19 @@ int pbn_div(struct pbn **qp, struct pbn **rp, struct pbn *n, struct pbn *d)
struct pbn *q; /* Quotient accumulator */
pbn_limb_t *un; /* Numerator/remainder array */
const pbn_limb_t *vn; /* Denominator array */
- pbn_limb_t qhat; /* Estimated quotient digit */
+ pbn_2limb_t qhat; /* Estimated quotient digit */
pbn_2limb_t rhat; /* Estimated remainder */
- pbn_2slimb_t p, t, k; /* Multiply-subtract loop intermediates */
+ pbn_2limb_t p; /* Product in multiply-subtract loop */
+ pbn_2slimb_t t, k; /* Multiply-subtract loop temporaries */
+ pbn_limb_t c; /* Add/subtract carry */
int nl, dl; /* Length of n, d in limbs */
int s; /* Normalization shift */
int i, j;
int rv;
- if ((rv = d->bits == 0) || pbn_abscmp(n, d) < 0) {
+ if (!d->bits || pbn_abscmp(n, d) < 0) {
/* We either had division by zero, or |n| < |d| */
+ rv = !d->bits ? -1 : !n->bits ? 0 : 1;
if (rp) {
if (d->minus && n->bits) {
n = pbn_cow(n, n->len);
@@ -56,7 +56,7 @@ int pbn_div(struct pbn **qp, struct pbn **rp, struct pbn *n, struct pbn *d)
if (qp)
*qp = pbn_int(0);
- return -rv; /* Division by zero? */
+ return rv; /* Division by zero? */
}
/* Numerator and denominator length in limbs */
@@ -71,7 +71,6 @@ int pbn_div(struct pbn **qp, struct pbn **rp, struct pbn *n, struct pbn *d)
if (dl == 1) {
/* Single limb denominator */
- pbn_limb_t c;
const pbn_limb_t dn = d->num[0];
c = 0;
@@ -95,19 +94,20 @@ int pbn_div(struct pbn **qp, struct pbn **rp, struct pbn *n, struct pbn *d)
for (j = nl - dl; j >= 0; j--) {
pbn_limb_t nx = un[j+dl];
- pbn_limb_t rhatx;
-
+ pbn_limb_t qhatx, rhatx;
+
/* Compute initial estimate of the quotient limb */
-
- rhat = 0;
+
+ rhat = qhat = 0;
if (nx >= vn[dl-1]) {
- rhat = vn[dl-1];
- nx -= vn[dl-1];
+ qhat = (pbn_2limb_t)1 << PBN_LIMB_BITS;
+ nx -= vn[dl-1];
}
-
- DIVIDE(nx, un[j+dl-1], vn[dl-1], qhat, rhatx);
+
+ DIVIDE(nx, un[j+dl-1], vn[dl-1], qhatx, rhatx);
+ qhat += qhatx;
rhat += rhatx;
-
+
/*
* Note: this loop can only be executed at most twice;
* consider unrolling
@@ -118,39 +118,46 @@ int pbn_div(struct pbn **qp, struct pbn **rp, struct pbn *n, struct pbn *d)
qhat--;
rhat += vn[dl-1];
}
-
+
/* Multiply-subtract loop */
k = 0;
for (i = 0; i < dl; i++) {
- p = (pbn_2limb_t)qhat * vn[i];
+ p = qhat * vn[i];
t = un[i+j] - k - (pbn_limb_t)p;
un[i+j] = (pbn_limb_t)t;
k = (p >> PBN_LIMB_BITS) - (t >> PBN_LIMB_BITS);
}
t = un[j+dl] - k;
un[j+dl] = (pbn_limb_t)t;
-
+
/* Correct in case of oversubtraction */
if (unlikely(t < 0)) {
qhat--;
- k = 0;
+ c = 0;
for (i = 0; i < dl; i++) {
- t = k + un[i+j] + vn[i];
+ t = (pbn_2limb_t)un[i+j] + vn[i] + c;
un[i+j] = (pbn_limb_t)t;
- k = t >> PBN_LIMB_BITS;
+ c = t >> PBN_LIMB_BITS;
}
- un[j+dl] += (pbn_limb_t)k;
+ un[j+dl] += c;
}
-
+
q->num[j] = qhat;
}
-
- if (rp)
- *rp = pbn_shr(pbn_adjust_bits(n), s);
- else
+
+ if (rp) {
+ n = pbn_shr(pbn_adjust_bits(n), s);
+ rv = !!n->bits;
+ *rp = n;
+ } else {
+ pbn_limb_t z = un[0];
+ for (i = 1; i < dl; i++)
+ z |= un[i];
+ rv = !!z;
pbn_free(n);
+ }
}
-
+
pbn_free(d);
if (qp)
@@ -158,5 +165,5 @@ int pbn_div(struct pbn **qp, struct pbn **rp, struct pbn *n, struct pbn *d)
else
pbn_free(q);
- return 0;
+ return rv;
}