Padlock Montgomery Multiplier Patch
The Montgomery Multiplier below uses binomial multiplicaion. It doesn't even double the speed (for SSL connections) compared to the standard OpenSSL implementation.
*** eng_padlock.c-dist 2008-04-26 10:16:46.000000000 -0400
--- eng_padlock.c 2008-04-27 16:53:30.000000000 -0400
***************
*** 64,69 ****
--- 64,70 ----
#include <stdio.h>
+ #include <stdlib.h> /* needed for memalign */
#include <string.h>
#include <openssl/opensslconf.h>
***************
*** 74,81 ****
--- 75,92 ----
#ifndef OPENSSL_NO_AES
#include <openssl/aes.h>
#endif
+ #ifndef OPENSSL_NO_RSA
+ #include <openssl/rsa.h>
+ #endif
+ #ifndef OPENSSL_NO_DSA
+ #include <openssl/dsa.h>
+ #endif
+ #ifndef OPENSSL_NO_DH
+ #include <openssl/dh.h>
+ #endif
#include <openssl/rand.h>
#include <openssl/err.h>
+ #include <byteswap.h>
#ifndef OPENSSL_NO_HW
#ifndef OPENSSL_NO_HW_PADLOCK
***************
*** 142,147 ****
--- 153,172 ----
static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid);
#endif
+ /*PMM Stuff */
+ #ifndef OPENSSL_NO_RSA
+ static RSA_METHOD padlock_rsa;
+ static int reg_rsa(ENGINE *e);
+ #endif
+ #ifndef OPENSSL_NO_DSA
+ static DSA_METHOD padlock_dsa;
+ static int reg_dsa(ENGINE *e);
+ #endif
+ #ifndef OPENSSL_NO_DH
+ static DH_METHOD padlock_dh;
+ static int reg_dh(ENGINE *e);
+ #endif
+
/* Engine names */
static const char *padlock_id = "padlock";
static char padlock_name[100];
***************
*** 149,154 ****
--- 174,180 ----
/* Available features */
static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
static int padlock_use_rng = 0; /* Random Number Generator */
+ static int padlock_use_pmm = 0; /* Montgomery Multiplier */
#ifndef OPENSSL_NO_AES
static int padlock_aes_align_required = 1;
#endif
***************
*** 162,176 ****
/* Check available features */
padlock_available();
! #if 1 /* disable RNG for now, see commentary in vicinity of RNG code */
padlock_use_rng=0;
#endif
/* Generate a nice engine name with available features */
BIO_snprintf(padlock_name, sizeof(padlock_name),
! "VIA PadLock (%s, %s)",
padlock_use_rng ? "RNG" : "no-RNG",
! padlock_use_ace ? "ACE" : "no-ACE");
/* Register everything or return with an error */
if (!ENGINE_set_id(e, padlock_id) ||
--- 188,203 ----
/* Check available features */
padlock_available();
! #if 1 /* disable RNG for now, see commentary in vicinity of RNG code */
padlock_use_rng=0;
#endif
/* Generate a nice engine name with available features */
BIO_snprintf(padlock_name, sizeof(padlock_name),
! "VIA PadLock (%s, %s, %s)",
padlock_use_rng ? "RNG" : "no-RNG",
! padlock_use_ace ? "ACE" : "no-ACE",
! padlock_use_pmm ? "PMM" : "no-PMM");
/* Register everything or return with an error */
if (!ENGINE_set_id(e, padlock_id) ||
***************
*** 180,186 ****
#ifndef OPENSSL_NO_AES
(padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) ||
#endif
! (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) {
return 0;
}
--- 207,223 ----
#ifndef OPENSSL_NO_AES
(padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) ||
#endif
! (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand)) ||
! #ifndef OPENSSL_NO_RSA
! (padlock_use_pmm && !reg_rsa (e)) ||
! #endif
! #ifndef OPENSSL_NO_DSA
! (padlock_use_pmm && !reg_dsa (e)) ||
! #endif
! #ifndef OPENSSL_NO_DH
! (padlock_use_pmm && !reg_dh (e))
! #endif
! ){
return 0;
}
***************
*** 210,216 ****
static int
padlock_init(ENGINE *e)
{
! return (padlock_use_rng || padlock_use_ace);
}
/* This stuff is needed if this ENGINE is being compiled into a self-contained
--- 247,253 ----
static int
padlock_init(ENGINE *e)
{
! return (padlock_use_rng || padlock_use_ace || padlock_use_pmm);
}
/* This stuff is needed if this ENGINE is being compiled into a self-contained
***************
*** 364,371 ****
/* Fill up some flags */
padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
! return padlock_use_ace + padlock_use_rng;
}
#ifndef OPENSSL_NO_AES
--- 401,409 ----
/* Fill up some flags */
padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
+ padlock_use_pmm = ((edx & (0x3<<8)) == (0x3<<8));
! return padlock_use_ace + padlock_use_rng + padlock_use_pmm;
}
#ifndef OPENSSL_NO_AES
***************
*** 589,595 ****
jnc skip_r
mov padlock_use_rng,1
inc eax
! skip_r:
noluck:
}
}
--- 627,639 ----
jnc skip_r
mov padlock_use_rng,1
inc eax
! skip_r:bt edx,8
! jnc skip_p
! bt edx,9
! jnc skip_p
! mov padlock_use_pmm,1
! inc eax
! skip_p:
noluck:
}
}
***************
*** 1210,1215 ****
--- 1254,1887 ----
padlock_rand_status, /* rand status */
};
+ /* ===== Montgomery Multiplier ===== */
+
+ typedef unsigned long pmm32u;
+ typedef long word;
+ typedef char byte;
+
+ // #define ALIGN16(x) (uint*)(( ((uint)x) + 15 )&(~15UL))
+ /* the MONTMUL call still from the VIA programming guide */
+ /* #define MONTMUL(TEMP, DUMMY, CONTEXT) asm __volatile__ (".byte 0xF3, 0x0F, 0xA6, 0xC0\n" \
+ : "=c" (TEMP), "=a" (DUMMY) \
+ : "c" (TEMP), "a" (0), "S" (CONTEXT));
+ */
+
+ typedef struct {
+ BIGNUM bignum;
+ /* begin "union" */
+ BN_ULONG *allocated;
+ BIGNUM *source;
+ /* end "union" */
+ } PMMBN;
+
+ #define PMMBN_CTX_MAX 15
+ typedef struct {
+ unsigned max;
+ PMMBN d[PMMBN_CTX_MAX];
+ } PMMBN_CTX;
+
+ PMMBN_CTX* PMMBN_CTX_new();
+ PMMBN* PMMBN_CTX_get(PMMBN_CTX *ctx);
+ void PMMBN_CTX_free(PMMBN_CTX *ctx);
+
+ void PMMBN_init(PMMBN *n);
+ void PMMBN_free(PMMBN *n);
+
+ PMMBN* pmmbn_wexpand(PMMBN *n, int words);
+ #define pmmbn_set_top(N,NEWTOP) do { \
+ PMMBN *pmmbn_st = (N); \
+ int pmmbn_st_newtop = (NEWTOP); \
+ /* printf("pmmbn_set_top %p %i at %s %i\n", pmmbn_st, pmmbn_st_newtop, __PRETTY_FUNCTION__, __LINE__); /* */ \
+ if (pmmbn_st->bignum.top < pmmbn_st_newtop) { \
+ if (pmmbn_st->bignum.dmax < pmmbn_st_newtop) pmmbn_wexpand(pmmbn_st, pmmbn_st_newtop); \
+ memset(pmmbn_st->bignum.d+pmmbn_st->bignum.top, 0, sizeof(BN_ULONG)*(pmmbn_st_newtop - pmmbn_st->bignum.top)); \
+ pmmbn_st->bignum.top = pmmbn_st_newtop; \
+ } else { \
+ pmmbn_st->bignum.top = pmmbn_st_newtop; \
+ } \
+ } while (0)
+
+ #define PMMBN_zero(N) ((N)->bignum.top=0)
+ #define PMMBN_one(N) do { \
+ PMMBN *pmmbn1 = (N); \
+ pmmbn_set_top(pmmbn1,1); \
+ pmmbn1->bignum.d[0] = 1; \
+ } while (0)
+
+ int PMMBN_ucmp(const PMMBN *a, const PMMBN *b);
+ int PMMBN_copy(PMMBN *to, const PMMBN *from);
+ int PMMBN_usub(PMMBN *r, const PMMBN *a, const PMMBN *b);
+ int PMMBN_print_fp(FILE *fptr, const PMMBN *n);
+
+ BIGNUM* PMMBN_to_BN(PMMBN *n);
+ int BN_to_PMMBN(BIGNUM *from, PMMBN *to);
+
+ int PMM_mont_mul(PMMBN *r, PMMBN *a, PMMBN *b, PMMBN *m, BN_MONT_CTX *ctx);
+
+ static BN_CTX *s_ctx; // DEBUG
+
+ /* convert input, compute the modexp, convert output */
+ static int padlock_BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
+ const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
+ {
+ int i,j,bits,ret=0,wstart,wend,window,wvalue;
+ int start=1;
+ // BIGNUM *d,*r;
+ BIGNUM *aa;
+ // BIGNUM *val[TABLE_SIZE];
+ BN_MONT_CTX *mont=NULL;
+
+ s_ctx = ctx;
+
+ if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
+ {
+ fprintf(stderr,"BN_FLG_CONSTTIME unsupported in PMM, security uncertain\n");
+ // return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
+ }
+
+ if (m->top > 32768/32) { // too big
+ return BN_mod_exp(rr, a, p, m, ctx);
+ }
+
+ bn_check_top(a);
+ bn_check_top(p);
+ bn_check_top(m);
+
+ if (!BN_is_odd(m))
+ {
+ BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
+ return(0);
+ }
+ bits=BN_num_bits(p);
+ if (bits == 0)
+ {
+ ret = BN_one(rr);
+ return ret;
+ }
+
+ BN_CTX_start(ctx);
+ // d = BN_CTX_get(ctx);
+ // r = BN_CTX_get(ctx);
+ // val[0] = BN_CTX_get(ctx);
+ // if (!d || !r || !val[0]) goto err;
+
+ /* If this is not done, things will break in the montgomery
+ * part */
+
+ if (in_mont != NULL)
+ mont=in_mont;
+ else
+ {
+ if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
+ if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
+ }
+
+ if (a->neg || BN_ucmp(a,m) >= 0)
+ {
+ aa = BN_CTX_get(ctx);
+ if (!BN_nnmod(aa,a,m,ctx))
+ goto err;
+ }
+ else
+ aa=(BIGNUM*)a;
+ if (BN_is_zero(aa))
+ {
+ BN_zero(rr);
+ ret = 1;
+ goto err;
+ }
+
+ // begin pmm
+ if (1) {
+ PMMBN_CTX * pctx = PMMBN_CTX_new();
+ PMMBN *RR = PMMBN_CTX_get(pctx);
+ PMMBN *base = PMMBN_CTX_get(pctx);
+ PMMBN *basebar = PMMBN_CTX_get(pctx);
+ PMMBN *one = PMMBN_CTX_get(pctx);
+ PMMBN *xbar = PMMBN_CTX_get(pctx);
+ PMMBN *rv = PMMBN_CTX_get(pctx);
+ PMMBN *mod = PMMBN_CTX_get(pctx);
+ PMMBN *tmp;
+ int i;
+
+ PMMBN_one(one);
+ BN_to_PMMBN(&mont->RR,RR);
+ BN_to_PMMBN(aa,base);
+ BN_to_PMMBN((BIGNUM*)m,mod);
+ PMM_mont_mul(basebar,base,RR,mod,mont);
+ PMM_mont_mul(xbar,one,RR,mod,mont);
+
+ for (i=p->top*8*sizeof(BN_ULONG)-1; i >= 0; i--) {
+ PMM_mont_mul(rv,xbar,xbar,mod,mont);
+ tmp = xbar;
+ xbar = rv;
+ rv = tmp;
+ if (BN_is_bit_set(p,i)) {
+ PMM_mont_mul(rv,basebar,xbar,mod,mont);
+ tmp = xbar;
+ xbar = rv;
+ rv = tmp;
+ }
+ }
+ PMM_mont_mul(rv,xbar,one,mod,mont);
+ BN_copy(rr, PMMBN_to_BN(rv));
+
+
+ /*
+ BN_CTX_start(ctx);
+ BIGNUM *r = BN_CTX_get(ctx);
+ BN_print_fp(stdout, a); fprintf(stdout," aPMM\n");
+ BN_print_fp(stdout, p); fprintf(stdout," pPMM\n");
+ BN_print_fp(stdout, m); fprintf(stdout," mPMM\n");
+ BN_print_fp(stdout, rr); fprintf(stdout," rPMM\n");
+ BN_mod_exp(r, a, p, m, ctx);
+ BN_print_fp(stdout, r); fprintf(stdout," !PMM\n");
+ BN_CTX_end(ctx);
+ // */
+
+ PMMBN_CTX_free(pctx);
+ }
+ // end pmm
+
+ #ifdef WINDOW
+ if (!BN_to_montgomery(val[0],one,mont,ctx)) goto err; /* 1 */
+
+ window = BN_window_bits_for_exponent_size(bits);
+ if (window > 1)
+ {
+ if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */
+ j=1<<(window-1);
+ for (i=1; i<j; i++)
+ {
+ if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
+ !BN_mod_mul_montgomery(val[i],val[i-1],
+ d,mont,ctx))
+ goto err;
+ }
+ }
+
+ start=1; /* This is used to avoid multiplication etc
+ * when there is only the value '1' in the
+ * buffer. */
+ wvalue=0; /* The 'value' of the window */
+ wstart=bits-1; /* The top bit of the window */
+ wend=0; /* The bottom bit of the window */
+
+ if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
+ for (;;)
+ {
+ if (BN_is_bit_set(p,wstart) == 0)
+ {
+ if (!start)
+ {
+ if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
+ goto err;
+ }
+ if (wstart == 0) break;
+ wstart--;
+ continue;
+ }
+ /* We now have wstart on a 'set' bit, we now need to work out
+ * how bit a window to do. To do this we need to scan
+ * forward until the last set bit before the end of the
+ * window */
+ j=wstart;
+ wvalue=1;
+ wend=0;
+ for (i=1; i<window; i++)
+ {
+ if (wstart-i < 0) break;
+ if (BN_is_bit_set(p,wstart-i))
+ {
+ wvalue<<=(i-wend);
+ wvalue|=1;
+ wend=i;
+ }
+ }
+
+ /* wend is the size of the current window */
+ j=wend+1;
+ /* add the 'bytes above' */
+ if (!start)
+ for (i=0; i<j; i++)
+ {
+ if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
+ goto err;
+ }
+
+ /* wvalue will be an odd number < 2^window */
+ if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx))
+ goto err;
+
+ /* move the 'window' down further */
+ wstart-=wend+1;
+ wvalue=0;
+ start=0;
+ if (wstart < 0) break;
+ }
+ if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
+ #endif
+
+ ret=1;
+ err:
+ if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
+ BN_CTX_end(ctx);
+ bn_check_top(rr);
+ return(ret);
+ }
+
+ PMMBN_CTX* PMMBN_CTX_new()
+ {
+ PMMBN_CTX *out = (PMMBN_CTX*)malloc(sizeof(PMMBN_CTX));
+ out->max = 0;
+ return out;
+ }
+ PMMBN* PMMBN_CTX_get(PMMBN_CTX *ctx)
+ {
+ PMMBN *out;
+ if (!ctx) return NULL;
+ if (ctx->max >= PMMBN_CTX_MAX) {
+ fprintf(stderr, "Too many PMMBN_CTX temporary variables\n");
+ return NULL;
+ }
+ out = ctx->d + ctx->max;
+ ctx->max++;
+ PMMBN_init(out);
+ return out;
+ }
+ void PMMBN_CTX_free(PMMBN_CTX *ctx)
+ {
+ if (!ctx) return;
+ int i;
+ for (i = 0; i < ctx->max; i++) PMMBN_free(ctx->d + i);
+ ctx->max = 0;
+ free(ctx);
+ }
+
+ void PMMBN_init(PMMBN *n)
+ {
+ memset(n,0,sizeof(PMMBN));
+ }
+ void PMMBN_free(PMMBN *n)
+ {
+ // BN_free(n->source);
+ free(n->allocated);
+ PMMBN_init(n);
+ }
+
+ PMMBN* pmmbn_wexpand(PMMBN *n, int words)
+ {
+ if (n == NULL) return NULL;
+ if (words <= n->bignum.dmax) return n;
+
+ BN_ULONG *a2 = (BN_ULONG*)malloc(sizeof(BN_ULONG)*words + 16/*bytes*/);
+ BN_ULONG *d2 = (BN_ULONG*)( (word)a2+15 & ~15L );
+
+ // printf("%s %p %p %i\n", __PRETTY_FUNCTION__, d2, n->bignum.d, n->bignum.top*sizeof(BN_ULONG));
+ if (n->bignum.d) memcpy(d2, n->bignum.d, n->bignum.top*sizeof(BN_ULONG));
+ // printf("%s %p %p -> %p\n", __PRETTY_FUNCTION__, n, n->allocated, a2);
+ free(n->allocated);
+ // BN_free(n->source);
+
+ n->bignum.d = d2;
+ n->bignum.dmax = n->bignum.dmax + (d2-a2);
+ n->allocated = a2;
+ n->source = NULL;
+
+ return n;
+ }
+
+ int BN_to_PMMBN(BIGNUM *from, PMMBN *to)
+ {
+ if (from == NULL || to == NULL) return 0;
+
+ BN_ULONG *aligned = from->d;
+ aligned = (BN_ULONG*)( (word)aligned+15 & ~15L );
+ if (aligned == from->d) {
+ to->source = from;
+ to->allocated = NULL;
+ memcpy(&to->bignum,from,sizeof(BIGNUM));
+ } else {
+ to->bignum.top = 0;
+ pmmbn_wexpand(to, from->dmax);
+ to->bignum.top = from->top;
+ to->bignum.neg = 0;
+ to->bignum.flags = 0;
+ memcpy(to->bignum.d, from->d, sizeof(BN_ULONG)*from->top);
+ }
+ return 1;
+ }
+
+ BIGNUM* PMMBN_to_BN(PMMBN *n)
+ {
+ if (n == NULL) return NULL;
+ while (n->bignum.d[n->bignum.top-1] == 0) n->bignum.top--;
+ return &n->bignum;
+ }
+ int PMMBN_ucmp(const PMMBN *a, const PMMBN *b)
+ {
+ if (a == NULL && b == NULL) return 0;
+ if (a != NULL && b == NULL) return 1;
+ if (a == NULL && b != NULL) return -1;
+
+ int as=a->bignum.top, bs=b->bignum.top;
+ while (a->bignum.d[as-1] == 0) as--;
+ while (b->bignum.d[bs-1] == 0) bs--;
+ if (as != bs) {
+ return as-bs;
+ }
+ int i;
+ for (i = a->bignum.top-1; i >= 0; i--) {
+ const BN_ULONG an = a->bignum.d[i];
+ const BN_ULONG bn = b->bignum.d[i];
+ if (an == bn) continue;
+ if (an > bn) { return 1; }
+ if (an < bn) { return -1; }
+ }
+ return 0;
+ }
+
+ int PMMBN_copy(PMMBN *to, const PMMBN *from)
+ {
+ pmmbn_set_top(to,from->bignum.top);
+ memcpy(to->bignum.d, from->bignum.d, from->bignum.top*sizeof(BN_ULONG));
+ to->bignum.neg = from->bignum.neg;
+ to->bignum.top = from->bignum.top;
+ to->bignum.flags = from->bignum.flags;
+ return 1;
+ }
+
+ int PMMBN_usub(PMMBN *r, const PMMBN *a, const PMMBN *b)
+ {
+ int i,j;
+ const int len = b->bignum.top;
+
+ if (r != a) PMMBN_copy(r, a);
+
+ for (i = 0; i < len; i++) {
+ if (b->bignum.d[i] > r->bignum.d[i]) {
+ j = i+1;
+ r->bignum.d[j]--;
+ while (r->bignum.d[j] == 0xffffffff /*~(BN_ULONG)0*/) {
+ r->bignum.d[++j]--;
+ if (j >= r->bignum.top) {
+ fprintf(stderr, "Beyond top\n");
+ exit(1);
+ }
+ }
+ }
+ r->bignum.d[i] -= b->bignum.d[i];
+ }
+ return 1;
+ }
+
+ int PMMBN_print_fp(FILE *fptr, const PMMBN *n)
+ {
+ if (fptr == NULL || n == NULL) return 0;
+ int i=n->bignum.top;
+ if (n->bignum.neg) fprintf(fptr,"-");
+ for (i--; i>=0; i--) {
+ fprintf(fptr,"%0*x",sizeof(BN_ULONG)*2,n->bignum.d[i]);
+ }
+ return 1;
+ }
+
+ int PMM_mont_mul(PMMBN *r, PMMBN *a, PMMBN *b, PMMBN *m, BN_MONT_CTX *ctx)
+ {
+ struct {
+ pmm32u mZeroPrime;
+ pmm32u * A;
+ pmm32u * B;
+ pmm32u * T; // +128 bits
+ pmm32u * M;
+ pmm32u * scratch;
+ } impl;
+ pmm32u scratch[12]; // 8 + padding to 128-bit boundary
+ pmm32u dummy;
+ pmm32u bits;
+ word max;
+
+ if (r == NULL || a == NULL || b == NULL || m == NULL || ctx == NULL)
+ return 0;
+
+ const char DEBUG = 0;
+ if (DEBUG && BN_cmp(&ctx->N, PMMBN_to_BN(m)) != 0) {
+ fprintf(stderr,"m doesn't match BN_MONT_CTX\n");
+ exit(1);
+ }
+
+ max = 256/(sizeof(BN_ULONG)*8);
+ if (a->bignum.top > max) max = a->bignum.top;
+ if (b->bignum.top > max) max = b->bignum.top;
+ if (m->bignum.top > max) max = m->bignum.top;
+ max = (max+3)&~3 ; // align 128 bits, if sizeof(BN_ULONG)==4
+ pmmbn_set_top(a, max);
+ pmmbn_set_top(b, max);
+ pmmbn_set_top(m, max);
+
+ PMMBN_zero(r);
+ pmmbn_set_top(r, max + 128/(sizeof(BN_ULONG)*8) );
+
+ memset(&scratch,0,12);
+ impl.scratch = (pmm32u*)( (word)&scratch+15 & ~15L );
+ impl.mZeroPrime = ctx->n0;
+ impl.A = a->bignum.d;
+ impl.B = b->bignum.d;
+ impl.M = m->bignum.d;
+ impl.T = r->bignum.d;
+ dummy = 0;
+ bits = max*sizeof(BN_ULONG)*8;
+
+ #define MONTMUL(TEMP, DUMMY, CONTEXT) asm __volatile__ (".byte 0xF3, 0x0F, 0xA6, 0xC0\n" \
+ : "=c" (TEMP), "=a" (DUMMY) \
+ : "c" (TEMP), "a" (0), "S" (CONTEXT));
+ MONTMUL(bits,dummy,&impl);
+ #undef MONTMUL
+
+ if (PMMBN_ucmp(r, m) >= 0) {
+ if (!PMMBN_usub(r, r, m)) return 0;
+ }
+
+ PMMBN_to_BN(a);
+ PMMBN_to_BN(b);
+ PMMBN_to_BN(r);
+ /*
+ BN_CTX_start(s_ctx);
+ BIGNUM *tmp = BN_CTX_get(s_ctx);
+ BN_mod_mul_montgomery(tmp, PMMBN_to_BN(a), PMMBN_to_BN(b), ctx, s_ctx);
+ if (BN_cmp(tmp, PMMBN_to_BN(r))) {
+ PMMBN_print_fp(stdout,a); fprintf(stdout," a\n");
+ PMMBN_print_fp(stdout,b); fprintf(stdout," b\n");
+ PMMBN_print_fp(stdout,m); fprintf(stdout," m\n");
+ PMMBN_print_fp(stdout,r); fprintf(stdout," r\n");
+ BN_print_fp(stdout,tmp); fprintf(stdout," bn\n");
+ }
+ BN_CTX_end(s_ctx);
+ // */
+
+ return 1;
+ }
+
+ /* RSA DSA and DH method reg inspired from the cryptodev engine (eng_cryptodev.c */
+ /* RSA */
+
+ #ifndef OPENSSL_NO_RSA
+
+ static RSA_METHOD padlock_rsa = {
+ "padlock_rsa",
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ padlock_BN_mod_exp_mont,
+ NULL,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ 0,
+ NULL,
+ };
+
+ static int reg_rsa(ENGINE *e)
+ {
+ const RSA_METHOD *soft_rsa = RSA_PKCS1_SSLeay();
+ padlock_rsa.rsa_pub_enc = soft_rsa->rsa_pub_enc;
+ padlock_rsa.rsa_pub_dec = soft_rsa->rsa_pub_dec;
+ padlock_rsa.rsa_priv_enc = soft_rsa->rsa_priv_enc;
+ padlock_rsa.rsa_priv_dec = soft_rsa->rsa_priv_dec;
+ padlock_rsa.rsa_mod_exp = soft_rsa->rsa_mod_exp;
+ padlock_rsa.init = soft_rsa->init;
+ padlock_rsa.finish = soft_rsa->finish;
+ return ENGINE_set_RSA(e,&padlock_rsa);
+ }
+
+ #endif
+
+ /* DSA */
+
+ #ifndef OPENSSL_NO_DSA
+
+ static int padlock_dsa_mod_exp(DSA *dsa, BIGNUM *r, BIGNUM *a, const BIGNUM *p,
+ const BIGNUM *m, BN_CTX *ctx,
+ BN_MONT_CTX *m_ctx)
+ {
+ return padlock_BN_mod_exp_mont(r,a,p,m,ctx,m_ctx);
+ }
+
+ static DSA_METHOD padlock_dsa = {
+ "padlock_dsa",
+ NULL,
+ NULL,
+ NULL,
+ NULL, /* not sure what to put here, check dsa_ossl.c */
+ padlock_dsa_mod_exp,
+ NULL,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ NULL,
+ };
+
+ static int reg_dsa(ENGINE *e)
+ {
+ const DSA_METHOD *soft_dsa = DSA_OpenSSL();
+ padlock_dsa.dsa_do_sign = soft_dsa->dsa_do_sign;
+ padlock_dsa.dsa_sign_setup = soft_dsa->dsa_sign_setup;
+ padlock_dsa.dsa_do_verify = soft_dsa->dsa_do_verify;
+ padlock_dsa.init = soft_dsa->init;
+ padlock_dsa.finish = soft_dsa->finish;
+ padlock_dsa.flags = soft_dsa->flags;
+ padlock_dsa.app_data = soft_dsa->app_data;
+ padlock_dsa.dsa_paramgen = soft_dsa->dsa_paramgen;
+ padlock_dsa.dsa_keygen = soft_dsa->dsa_keygen;
+ return ENGINE_set_DSA(e,&padlock_dsa);
+ }
+
+ #endif
+
+ /* DH */
+
+ #ifndef OPENSSL_NO_DH
+
+ static int padlock_dh_bn_mod_exp(const DH *dh, BIGNUM *r,
+ const BIGNUM *a, const BIGNUM *p,
+ const BIGNUM *m, BN_CTX *ctx,
+ BN_MONT_CTX *m_ctx)
+ {
+ return padlock_BN_mod_exp_mont(r,a,p,m,ctx,m_ctx);
+ }
+
+ static DH_METHOD padlock_dh = {
+ "padlock_dh",
+ NULL,
+ NULL,
+ padlock_dh_bn_mod_exp,
+ NULL,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ };
+
+ static int reg_dh(ENGINE *e)
+ {
+ const DH_METHOD *soft_dh = DH_OpenSSL();
+ padlock_dh.generate_key = soft_dh->generate_key;
+ padlock_dh.compute_key = soft_dh->compute_key;
+ padlock_dh.init = soft_dh->init;
+ padlock_dh.finish = soft_dh->finish;
+ padlock_dh.flags = soft_dh->flags;
+ padlock_dh.app_data = soft_dh->app_data;
+ padlock_dh.generate_params = soft_dh->generate_params;
+ return ENGINE_set_DH(e,&padlock_dh);
+ }
+
+ #endif
+
#endif /* COMPILE_HW_PADLOCK */
#endif /* !OPENSSL_NO_HW_PADLOCK */
0 Comments:
Post a Comment
Subscribe to Post Comments [Atom]
<< Home