diff --git a/bin/encoder.o b/bin/encoder.o index 2d66894c23caf0e68bae302f3dbb767e7abab111..93f732aa58952280e9321ea171eec27dab42d68c 100644 Binary files a/bin/encoder.o and b/bin/encoder.o differ diff --git a/bin/ntt.o b/bin/ntt.o index 29792475d4283efe7e25daa09b58554e1707f568..ff7369a84e538a9bab531fb8eeda61cd4c2042db 100644 Binary files a/bin/ntt.o and b/bin/ntt.o differ diff --git a/cuda/bin/ciphertext.o b/cuda/bin/ciphertext.o index 1580a51b7f93fb54a277011c86dcc99d618d4511..0dbbf06bc3fccbf33e11c2f14e319f5a3ee5f607 100644 Binary files a/cuda/bin/ciphertext.o and b/cuda/bin/ciphertext.o differ diff --git a/cuda/bin/ckks.o b/cuda/bin/ckks.o index 9f27a6dc1bbf9649000510f97701b3f18856fee6..1f6d1b586906392db61a50a2cf6a28a707706ecd 100644 Binary files a/cuda/bin/ckks.o and b/cuda/bin/ckks.o differ diff --git a/cuda/bin/evalkey.o b/cuda/bin/evalkey.o index 0a421c59846a55e67e31b07ed0a2dd8355212af2..9ad9294ce903a9120383f94e83cf1b1adfd81dc5 100644 Binary files a/cuda/bin/evalkey.o and b/cuda/bin/evalkey.o differ diff --git a/cuda/bin/ntt.o b/cuda/bin/ntt.o index d27e85ab1b9dc1e1cf10aa98ac7e9aa7c9406a1a..e3479d5295d83abe5e251fde0df36dfdbd863867 100644 Binary files a/cuda/bin/ntt.o and b/cuda/bin/ntt.o differ diff --git a/cuda/bin/parallel.o b/cuda/bin/parallel.o index 234ae9b2240064456b6c9238a3dc960e0723571d..ca5019f77d0231e6cee231b0ebb7eed377c64fc8 100644 Binary files a/cuda/bin/parallel.o and b/cuda/bin/parallel.o differ diff --git a/cuda/bin/polynomial.o b/cuda/bin/polynomial.o index 3f3e4a8de9a7f0237b2050621639ae91ddb7fc2e..226795151d3eb1e578bbb56c34e8890eb8e77654 100644 Binary files a/cuda/bin/polynomial.o and b/cuda/bin/polynomial.o differ diff --git a/cuda/bin/pubkey.o b/cuda/bin/pubkey.o index c85cfc0bae7cff1d80e94b823755b80e80e88cb4..7af577e01f0b25ed2b04e15e0534fde8a5e1411e 100644 Binary files a/cuda/bin/pubkey.o and b/cuda/bin/pubkey.o differ diff --git a/cuda/bin/seckey.o b/cuda/bin/seckey.o index dececb994b5bcc125b327f05e1c465d451ed0c45..7a26dfbffd75e14bb1e892a68d112eef482e21bb 100644 Binary files a/cuda/bin/seckey.o and b/cuda/bin/seckey.o differ diff --git a/cuda/ckks.cu b/cuda/ckks.cu index 2570db100f31ada490311c8e285abe020f4469d6..ff47c80a5c9bdf28f501b9c586bb1bd9181e33bf 100644 --- a/cuda/ckks.cu +++ b/cuda/ckks.cu @@ -3,18 +3,16 @@ using namespace std; -CKKS::CKKS(int N, PubKey _pk, EvalKey _evk, SecKey _sk){ - pk = _pk; - sk = _sk; +CKKS::CKKS(EvalKey _evk){ evk = _evk; - deg = N; ql = q0; for (int i=0; i<6; i++){ ql *= pl[i]; } } -Ciphertext CKKS::encrypt(Polynomial pt){ + +Ciphertext CKKS::encrypt(Polynomial pt, PubKey pk){ //Polynomial e = genE(pt.degree, 1.0); Polynomial c0 = (pt) + pk.b; Polynomial c1 = pk.a; @@ -23,7 +21,7 @@ Ciphertext CKKS::encrypt(Polynomial pt){ return ct; } -Polynomial CKKS::decrypt(Ciphertext ct){ +Polynomial CKKS::decrypt(Ciphertext ct, SecKey sk){ Polynomial pt = ct.c0 + (ct.c1 * sk.s); return pt.modCoeff(ql); } @@ -33,6 +31,20 @@ Ciphertext CKKS::add(Ciphertext ct1, Ciphertext ct2){ return out; } +Polynomial CKKS::fastMult(Polynomial p1, Polynomial p2, int deg, int64_t w){ + vector<int64_t> pol1 = ntt.ntt(p1, deg, w); + vector<int64_t> pol2 = ntt.ntt(p2, deg, w); + + int64_t p = 4179340454199820289; + vector<int64_t> mult(deg, 0); + for(int i=0; i<deg; i++){ + mult.push_back(NTT::modMul(pol1[i], pol2[i], p)); + } + + Polynomial out = ntt.intt(mult, w); + return out; +} + Ciphertext CKKS::mult(Ciphertext ct1, Ciphertext ct2){ // Multiplication Polynomial d1 = (ct1.c0 * ct2.c0).modCoeff(ql); @@ -55,4 +67,18 @@ Ciphertext CKKS::mult(Ciphertext ct1, Ciphertext ct2){ Ciphertext out(c0.modCoeff(ql), c1.modCoeff(ql)); return out; +} + +int getNextPow(int deg){ + int out = 2; + while(out < deg){ + out <<= 1; + } + return out; +} + +Ciphertext CKKS::multNTT(Ciphertext ct1, Ciphertext ct2){ + level += 1; + Ciphertext out = mult(ct1, ct2); + return out; } \ No newline at end of file diff --git a/cuda/ckks.h b/cuda/ckks.h index d4ae662e15d8acff8b45cbf98d2f7cd24126286c..87edf3f33ecedbda476fe8f3e6b10f0708de0456 100644 --- a/cuda/ckks.h +++ b/cuda/ckks.h @@ -1,4 +1,5 @@ #include "polynomial.h" +#include "ntt.h" #include "pubkey.h" #include "seckey.h" #include "evalkey.h" @@ -8,22 +9,22 @@ #define CKKS_H class CKKS { - public: - int deg; + private: int level = 6; int q0 = 67; - int pl[6] = {61, 67, 71, 73, 79, 59}; + int pl[6] = {61, 67, 71, 73, 79, 67}; int64_t ql; - PubKey pk; - SecKey sk; EvalKey evk; + NTT ntt; - CKKS(int N, PubKey _pk, EvalKey _evk, SecKey _sk); - Polynomial fastMult(Polynomial p1, Polynomial p2); + public: + CKKS(EvalKey _evk); + Polynomial fastMult(Polynomial p1, Polynomial p2, int deg, int64_t w); Ciphertext add(Ciphertext ct1, Ciphertext ct2); Ciphertext mult(Ciphertext ct1, Ciphertext ct2); - Ciphertext encrypt(Polynomial pt); - Polynomial decrypt(Ciphertext ct); + Ciphertext multNTT(Ciphertext ct1, Ciphertext ct2); + Ciphertext encrypt(Polynomial pt, PubKey pk); + Polynomial decrypt(Ciphertext ct, SecKey sk); }; #endif \ No newline at end of file diff --git a/cuda/evalkey.cu b/cuda/evalkey.cu index c2fbe958b78cf9a526b7ceeb571bd9660881c4f2..7156c5c4ec09468d00e9e767bca1201bad7e6817 100644 --- a/cuda/evalkey.cu +++ b/cuda/evalkey.cu @@ -29,10 +29,10 @@ Polynomial EvalKey::genE(int degree, double var){ } void EvalKey::generateA(int degree, int64_t q){ - int64_t half_q = (p*q)/2; + int64_t half_q = (q)/2; random_device rd; mt19937 gen(rd()); - uniform_int_distribution<int64_t> dis(-half_q, 0); + uniform_int_distribution<int64_t> dis(0, half_q); double a_coeffs[degree]; for (int i=0; i<degree; i++){ @@ -45,9 +45,10 @@ void EvalKey::generateA(int degree, int64_t q){ void EvalKey::computeB(int q, Polynomial s){ Polynomial err = genE(a.degree, 1.0); Polynomial ev = (s * s).scaleCoeff(p); - Polynomial temp = ((a.scaleCoeff(-1.0)) * s) + ev + err; + Polynomial temp = ((a.scaleCoeff(-1.0)) * s) + ev; Polynomial res = temp.modCoeff(q * p); b = Polynomial(res.degree); b.setCoeffs(res.coeffs); + } \ No newline at end of file diff --git a/cuda/ntt.cu b/cuda/ntt.cu index b45b53496c9a26ad19908c5d9dfa7f1d38a141aa..7c89eb9cc4adfd21e5f5902a275252c2df222100 100644 --- a/cuda/ntt.cu +++ b/cuda/ntt.cu @@ -1,4 +1,7 @@ #include "ntt.h" +#include <chrono> + +using namespace std::chrono; __device__ int64_t devModExp(int64_t base, int64_t power, int64_t mod){ int64_t res = 1; @@ -108,7 +111,7 @@ int64_t NTT::genNthRoot(int mod, int n){ } void NTT::reverse(vector<int64_t> &in, int bitLen){ - for (int i=0; i<size(in); i++){ + for (int i=0; i<in.size(); i++){ int64_t revN = 0; for(int j=0; j<bitLen ; j++){ if ((i >> j) & 1){ @@ -145,8 +148,22 @@ __global__ void correctNeg(int64_t* points, int M){ if(points[idx] < 0) points[idx] += M; } +int64_t NTT::modMul(int64_t a, int64_t b, int64_t mod){ + int64_t res = 0; + a = a % mod; + while(b){ + if (b & 1){ + res = (res + a) % mod; + } + a = (a*2) % mod; + b >>= 1; + } + + return res; +} + void NTT::_ntt(vector<int64_t> &in, int64_t w){ - int N = size(in); + int N = in.size(); int nBit = bitLength(N) - 1; reverse(in, nBit); @@ -159,12 +176,19 @@ void NTT::_ntt(vector<int64_t> &in, int64_t w){ int64_t* points; cudaMalloc((void**)&points, N*sizeof(int64_t)); + + int block = 1; + int threadPerBlock = N/2; + if(N/2 > 1024){ + threadPerBlock = 1024; + block = ceil((double)(N/2)/1024.0); + } for(int i=0; i<nBit; i++){ int64_t* p1; cudaMalloc((void**)&p1, N*sizeof(int64_t)); - computeNTT<<<1, N/2>>>(p1, N, i, w, nBit, input); + computeNTT<<<block, threadPerBlock>>>(p1, N, i, w, nBit, input); cudaDeviceSynchronize(); cudaMemcpy(points, p1, N*sizeof(int64_t), cudaMemcpyDeviceToDevice); @@ -176,10 +200,13 @@ void NTT::_ntt(vector<int64_t> &in, int64_t w){ cudaFree(p1); } - correctNeg<<<1, N>>>(points, 2013265921); + correctNeg<<<block*2, threadPerBlock>>>(points, 2013265921); cudaMemcpy(temp, points, N*sizeof(int64_t), cudaMemcpyDeviceToHost); - for(int i=0; i<N; i++) in[i] = temp[i]; + for(int i=0; i<N; i++) { + in[i] = temp[i]; + } + } vector<int64_t> NTT::ntt(Polynomial in, int degree, int64_t w){ @@ -191,21 +218,22 @@ vector<int64_t> NTT::ntt(Polynomial in, int degree, int64_t w){ for(int i=0; i<in.degree; i++){ out[i] = (int64_t) input[i]; } - + _ntt(out, w); return out; } Polynomial NTT::intt(vector<int64_t> &in, int w){ - int N = size(in); + int N = in.size(); double* coeff = (double*) malloc(N*sizeof(double)); int64_t wInv = modInv(w, M); int64_t nInv = modInv(N, M); _ntt(in, wInv); - for(int i=0; i<size(in); i++){ - coeff[i] = (in[i] * nInv) % M; + + for(int i=0; i<in.size(); i++){ + coeff[i] = modMul(in[i], nInv, M); } Polynomial pOut(N, coeff); diff --git a/cuda/ntt.h b/cuda/ntt.h index 5078f1f82c2edb248ad84898dd70fc22fb95d45c..809cc3ee9dcc7b7f365156c075b2673f39afd83d 100644 --- a/cuda/ntt.h +++ b/cuda/ntt.h @@ -4,7 +4,7 @@ using namespace std; -#define size(c) ((int)(c).size()) + #ifndef NTT_H #define NTT_H @@ -12,15 +12,19 @@ using namespace std; class NTT{ public: int M = 2013265921; + + vector<int64_t> ntt(Polynomial in, int degree, int64_t w); + Polynomial intt(vector<int64_t> &in, int w); + static int64_t modMul(int64_t a, int64_t b, int64_t mod); + int64_t genNthRoot(int mod, int n); + + private: int64_t modExp(int64_t base, int64_t power, int64_t mod); int64_t modInv(int64_t x, int64_t mod); int bitLength(int x); void reverse(vector<int64_t> &in, int bitLen); bool existSmallerN(int r, int mod, int n); - int64_t genNthRoot(int mod, int n); void _ntt(vector<int64_t> &in, int64_t w); - vector<int64_t> ntt(Polynomial in, int degree, int64_t w); - Polynomial intt(vector<int64_t> &in, int w); }; diff --git a/cuda/parallel.cu b/cuda/parallel.cu index 29d587b9e64c28b0b32d26a4729b96d8ecb586f0..ced1c066d04e4693987ee47c5469fdf99e9965d5 100644 --- a/cuda/parallel.cu +++ b/cuda/parallel.cu @@ -30,9 +30,9 @@ __device__ cuDoubleComplex power(cuDoubleComplex base, int power){ __global__ void initVandermonde(cuDoubleComplex *vand, int M, cuDoubleComplex xi){ int ROW = blockIdx.y*blockDim.y+threadIdx.y; int COL = blockIdx.x*blockDim.x+threadIdx.x; + int exp = (2*ROW+1)*COL; vand[ROW * M + COL] = power(xi, exp); - if(ROW == 0 && COL == 0) printf("test = %f\n", cuCreal(power(xi, exp))); } __global__ void transpose(cuDoubleComplex *dest, cuDoubleComplex *source, int N){ @@ -67,6 +67,23 @@ __global__ void computeCoord(double *res, cuDoubleComplex *target, cuDoubleCompl res[idx] = temp; } +__global__ void computeLargeCoord(double *res, cuDoubleComplex *target, cuDoubleComplex xi, int M){ + int idx = threadIdx.x + blockIdx.x*blockDim.x; + double temp; + + cuDoubleComplex a = make_cuDoubleComplex(0.0,0.0); + cuDoubleComplex b = make_cuDoubleComplex(0.0,0.0); + for(int i=0; i<M; i++){ + int exp = (2*i+1) * idx; + cuDoubleComplex vandBase = power(xi, exp); + a = cuCadd(a, cuCmul(vandBase, cuConj(target[i]))); + b = cuCadd(b, cuCmul(vandBase, cuConj(vandBase))); + } + + temp = cuCreal(a)/cuCreal(b); + res[idx] = temp; +} + __global__ void setup_kernel(curandState *state){ int idx = threadIdx.x+blockDim.x*blockIdx.x; curand_init(1234, idx, 0, &state[idx]); @@ -83,12 +100,23 @@ __global__ void unscale(double *target, double scale){ } -__global__ void sigma(double *pol, cuDoubleComplex *vand, int M, cuDoubleComplex *target){ +__global__ void sigma(double *pol, cuDoubleComplex *vand, int M, int offset, cuDoubleComplex *target){ int idx = threadIdx.x + blockIdx.x*blockDim.x; target[idx] = make_cuDoubleComplex(0.0, 0.0); + for(int i=0; i<M; i++){ - target[idx].x += vand[idx*M+i].x * pol[i]; - target[idx].y += vand[idx*M+i].y * pol[i]; + target[idx].x += vand[idx*offset+i].x * pol[i]; + target[idx].y += vand[idx*offset+i].y * pol[i]; + } +} + +__global__ void largeSigma(double *pol, cuDoubleComplex xi, int M, cuDoubleComplex *target){ + int idx = threadIdx.x + blockIdx.x*blockDim.x; + target[idx] = make_cuDoubleComplex(0.0, 0.0); + for(int i=0; i<M; i++){ + cuDoubleComplex x = power(xi, (2*idx+1)*i); + target[idx].x += x.x * pol[i]; + target[idx].y += x.y * pol[i]; } } @@ -110,17 +138,20 @@ Encoder::Encoder(int Min, double s){ blocksPerGrid.y = ceil(double(N)/double(threadsPerBlock.y)); } - initVandermonde<<<blocksPerGrid, threadsPerBlock>>>(vand, M/2, xi); - cudaDeviceSynchronize(); + if(M/4 <= pow(2,12)){ + initVandermonde<<<blocksPerGrid, threadsPerBlock>>>(vand, M/2, xi); + cudaDeviceSynchronize(); - dcomplex *test = (dcomplex *) malloc(64*64*sizeof(dcomplex)); - cudaMemcpy(test, vand, 64*64*sizeof(dcomplex), cudaMemcpyDeviceToHost); + dcomplex *test = (dcomplex *) malloc(64*64*sizeof(dcomplex)); + cudaMemcpy(test, vand, 64*64*sizeof(dcomplex), cudaMemcpyDeviceToHost); - cudaMalloc((void**)&sigmaRBasis, N*N*sizeof(cuDoubleComplex)); - transpose<<<blocksPerGrid, threadsPerBlock>>>(sigmaRBasis, vand, M/2); + cudaMalloc((void**)&sigmaRBasis, N*N*sizeof(cuDoubleComplex)); + transpose<<<blocksPerGrid, threadsPerBlock>>>(sigmaRBasis, vand, M/2); + } } Polynomial Encoder::encode(dcomparr input){ + cuDoubleComplex *target; size_t size = M/2; Polynomial out(M/2); @@ -128,26 +159,37 @@ Polynomial Encoder::encode(dcomparr input){ cudaMalloc((void**) &target, size * sizeof(cuDoubleComplex)); cudaMemcpy(target, input.arr, M/4 * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice); + auto start = high_resolution_clock::now(); + int block = 1; int threadPerBlock = M/2; if(threadPerBlock > 1024){ threadPerBlock = 1024; - block = ceil((double)(M/2)/1024.0); + block = ceil((double)(M/2)/1024.0); } - auto start = high_resolution_clock::now(); expScale<<<block,threadPerBlock/2>>>(target, M/2, scale); double *res; cudaMalloc((void**)&res, M/2*sizeof(double)); - computeCoord<<<block,threadPerBlock>>>(res, target, sigmaRBasis, M/2); - coordWRR<<<block,threadPerBlock>>>(res); + if(M/4 <= pow(2,12)){ + computeCoord<<<block,threadPerBlock>>>(res, target, sigmaRBasis, M/2); + }else { + root = exp((2*M_PI/M) * J); + cuDoubleComplex xi = make_cuDoubleComplex(real(root), imag(root)); + computeLargeCoord<<<block,threadPerBlock>>>(res, target, xi, M/2); + } + coordWRR<<<block,threadPerBlock>>>(res); out.setCoeffs(res); + auto stop = high_resolution_clock::now(); auto duration = duration_cast<microseconds>(stop - start); - cout << "Encoding: " << duration.count() << endl; + // cout << "Encoding: " << duration.count() << endl; + + cudaFree(res); + cudaFree(target); return out; } @@ -155,7 +197,7 @@ dcomparr Encoder::decode(Polynomial pt){ auto start = high_resolution_clock::now(); int block = 1; - int threadPerBlock = M/2; + int threadPerBlock = pt.degree; if(threadPerBlock > 1024){ threadPerBlock = 1024; block = ceil((double)(M/2)/1024.0); @@ -165,15 +207,56 @@ dcomparr Encoder::decode(Polynomial pt){ cuDoubleComplex *target; cudaMalloc((void**)&target, M/4 * sizeof(cuDoubleComplex)); - sigma<<<block,threadPerBlock/2>>>(pt.coeffs, vand, M/2, target); + + if(M/4 <= pow(2,12)){ + if(pt.degree == M/2){ + sigma<<<block,threadPerBlock/2>>>(pt.coeffs, vand, M/2, M/2, target); + } else { + int R = 2; + while (R < pt.degree){ + R <<=1; + } + + cuDoubleComplex *tempVand; + cudaMalloc((void**)&tempVand, R*R*sizeof(cuDoubleComplex)); + + cuDoubleComplex xi = make_cuDoubleComplex(real(root), imag(root)); + dim3 threads(R, R); + dim3 blocksPerGrid(1, 1); + + if (R*R > 512){ + threads.x = 16; + threads.y = 16; + blocksPerGrid.x = ceil(double(R)/double(threads.x)); + blocksPerGrid.y = ceil(double(R)/double(threads.y)); + } + + initVandermonde<<<blocksPerGrid, threads>>>(tempVand, R, xi); + + threadPerBlock = M/2; + if(threadPerBlock > 1024){ + threadPerBlock = 1024; + block = ceil((double)(M/2)/1024.0); + } + + sigma<<<block,threadPerBlock/2>>>(pt.coeffs, tempVand, pt.degree, R, target); + cudaFree(tempVand); + } + + }else { + cuDoubleComplex xi = make_cuDoubleComplex(real(root), imag(root)); + largeSigma<<<block,threadPerBlock/2>>>(pt.coeffs, xi, M/2, target); + } + + auto stop = high_resolution_clock::now(); + auto duration = duration_cast<microseconds>(stop - start); + // cout << "Decoding: " << duration.count() << endl; dcomparr output(M/4); size_t size_out = (M/4)*sizeof(dcomplex); cudaMemcpy(output.arr, target, size_out, cudaMemcpyDeviceToHost); - auto stop = high_resolution_clock::now(); - auto duration = duration_cast<microseconds>(stop - start); - cout << "Decoding: " << duration.count() << endl; + cudaFree(target); + return output; } - diff --git a/cuda/polynomial.cu b/cuda/polynomial.cu index 8f5944cddeb19130950fb94caabe2bb79b0bd82b..bf07b52b47aa26f186336fef3ebeb18aa988a562 100644 --- a/cuda/polynomial.cu +++ b/cuda/polynomial.cu @@ -31,11 +31,11 @@ __global__ void polyRoundScale(double *res, double *in, double scale){ res[idx] = (double) round(in[idx] * scale); } -__global__ void polyMult(double *res, double *a, double *b, int deg){ +__global__ void polyMult(double *res, double *a, double *b, int degA, int degB){ int idx = threadIdx.x; - res[idx] = 0; - for(int i=0; i<deg; i++){ - for(int j=0; j<deg; j++){ + res[idx] = 0.0; + for(int i=0; i<degA; i++){ + for(int j=0; j<degB; j++){ if(j+i == idx){ res[idx] += a[i] * b[j]; } @@ -62,7 +62,15 @@ Polynomial Polynomial::operator + (Polynomial const &obj){ int deg = max(obj.degree, degree); double* resCoeff; cudaMalloc((void**)&resCoeff, deg*sizeof(double)); - polyAdd<<<1,deg>>>(coeffs, degree, obj.coeffs, obj.degree, resCoeff); + + int block = 1; + int threadPerBlock = deg; + if(threadPerBlock > 1024){ + threadPerBlock = 1024; + block = ceil((double)(deg)/1024.0); + } + + polyAdd<<<block,threadPerBlock>>>(coeffs, degree, obj.coeffs, obj.degree, resCoeff); Polynomial out(deg); out.setCoeffs(resCoeff); @@ -70,12 +78,21 @@ Polynomial Polynomial::operator + (Polynomial const &obj){ } Polynomial Polynomial::operator * (Polynomial const &obj){ - int deg = obj.degree + degree; + int deg = obj.degree + degree-1; double *resCoeff; cudaMalloc((void**)&resCoeff, deg*sizeof(double)); - polyMult<<<1, deg>>>(resCoeff, coeffs, obj.coeffs, deg); + + int block = 1; + int threadPerBlock = deg; + if(threadPerBlock > 1024){ + threadPerBlock = 1024; + block = ceil((double)(deg)/1024.0); + } + + polyMult<<<block,threadPerBlock>>>(resCoeff, coeffs, obj.coeffs, degree, obj.degree); Polynomial out(deg); out.setCoeffs(resCoeff); + return out; } @@ -87,7 +104,15 @@ Polynomial Polynomial::modCoeff(int q){ Polynomial out(degree); double *resCoeff; cudaMalloc((void**)&resCoeff, degree*sizeof(double)); - polyMod<<<1, degree>>>(resCoeff, coeffs, q); + + int block = 1; + int threadPerBlock = degree; + if(threadPerBlock > 1024){ + threadPerBlock = 1024; + block = ceil((double)(degree)/1024.0); + } + + polyMod<<<block,threadPerBlock>>>(resCoeff, coeffs, q); out.setCoeffs(resCoeff); return out; } @@ -96,7 +121,15 @@ Polynomial Polynomial::scaleCoeff(double scale){ Polynomial out(degree); double *resCoeff; cudaMalloc((void**)&resCoeff, degree*sizeof(double)); - polyScale<<<1, degree>>>(resCoeff, coeffs, scale); + + int block = 1; + int threadPerBlock = degree; + if(threadPerBlock > 1024){ + threadPerBlock = 1024; + block = ceil((double)(degree)/1024.0); + } + + polyScale<<<block,threadPerBlock>>>(resCoeff, coeffs, scale); out.setCoeffs(resCoeff); return out; } @@ -105,7 +138,15 @@ Polynomial Polynomial::scaleRoundCoeff(double scale){ Polynomial out(degree); double *resCoeff; cudaMalloc((void**)&resCoeff, degree*sizeof(double)); - polyRoundScale<<<1, degree>>>(resCoeff, coeffs, scale); + + int block = 1; + int threadPerBlock = degree; + if(threadPerBlock > 1024){ + threadPerBlock = 1024; + block = ceil((double)(degree)/1024.0); + } + + polyRoundScale<<<block,threadPerBlock>>>(resCoeff, coeffs, scale); out.setCoeffs(resCoeff); return out; } diff --git a/cuda/pubkey.cu b/cuda/pubkey.cu index 9e9dbab090d5adcfa98b2332e63f9bcb37821bf4..580da10540557638e777a30c4ed422176373f28e 100644 --- a/cuda/pubkey.cu +++ b/cuda/pubkey.cu @@ -30,7 +30,7 @@ void PubKey::generateA(int degree, int64_t q){ int64_t half_q = q/2; random_device rd; mt19937 gen(rd()); - uniform_int_distribution<int64_t> dis(-half_q, 0); + uniform_int_distribution<int64_t> dis(0, half_q); double a_coeffs[degree]; for (int i=0; i<degree; i++){ @@ -41,9 +41,10 @@ void PubKey::generateA(int degree, int64_t q){ void PubKey::computeB(int q, Polynomial s){ Polynomial err = genE(a.degree, 1.0); - Polynomial temp = (a.scaleCoeff(-1.0)) * s + err; + Polynomial temp = (a.scaleCoeff(-1.0)) * s; Polynomial res = temp.modCoeff(q); + b = Polynomial(res.degree); b.setCoeffs(res.coeffs); } diff --git a/cuda/run b/cuda/run index 63310b5b9d6a41736086824680060af5cae67974..35cba99a8edbc6cf26fd1bd7d15e950ff0030216 100755 Binary files a/cuda/run and b/cuda/run differ diff --git a/cuda/seckey.cu b/cuda/seckey.cu index d0efd3915d2130c7dc3c2ea2a5065ce2c084e7b3..1636c661e35b5b2e81791b2f179ddc6f21ca7485 100644 --- a/cuda/seckey.cu +++ b/cuda/seckey.cu @@ -4,15 +4,22 @@ #include <random> #include "seckey.h" +#include <chrono> +using namespace std::chrono; + + using namespace std; SecKey::SecKey(int deg){ double coeff[deg]; + auto start1 = high_resolution_clock::now(); std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution<> distr(deg/2, deg); + + int h = distr(gen); string ones(h, '1'); string zeros((deg-h), '0'); @@ -21,9 +28,20 @@ SecKey::SecKey(int deg){ random_shuffle(key.begin(), key.end()); for(int i=0; i<deg; i++){ - char temp = key.at(i); - coeff[i] = (double) (atoi(&temp)); + char temp = key[i]; + int coef = (int(temp)-48); + + if(coef != 0){ + coeff[i] = (double) -1 * coef; + } else { + coeff[i] = 0.0; + } + } + auto stop1 = high_resolution_clock::now(); + auto duration1 = duration_cast<microseconds>(stop1 - start1); + cout << "secret key: " << duration1.count() << endl; + s = Polynomial(deg, coeff); } \ No newline at end of file diff --git a/cuda/temp.cu b/cuda/temp.cu new file mode 100644 index 0000000000000000000000000000000000000000..292e6429246ee2097f7f04312e5a8418188920ef --- /dev/null +++ b/cuda/temp.cu @@ -0,0 +1,298 @@ + // Encoder enc(128, 128.0); + // dcomplex a = 1.0 + 1.0 *J; + // dcomparr input; + + + + int q0 = 67; + int pl[6] = {61, 67, 71, 73, 79, 61}; + int64_t ql = q0; + for (int i=0; i<5; i++){ + ql *= pl[i]; + } + + int N = pow(2,1); + double scale = 64.0; + Encoder enc(4*N, scale); + dcomplex a1 = 3.0 + 4.0 *J; + dcomplex a2 = 2.0 - 1.0 *J; + dcomparr input(N); + input.arr[0] = a1; + input.arr[1] = a2; + + dcomparr input2(N); + input2.arr[0] = 1.0 + 1.0*J; + input2.arr[1] = 1.0 + 2.0*J; + + Polynomial pt1 = enc.encode(input2); + // pt1.printPol(); + + auto start = high_resolution_clock::now(); + Polynomial pt = enc.encode(input); + auto stop = high_resolution_clock::now(); + auto duration = duration_cast<microseconds>(stop - start); + pt.printPol(); + + // dcomparr output = enc.decode(pt); + // for(int i=0; i<N; i++){ + // cout << output.arr[i] << " "; + // } + // cout << endl; + + // double realN = 0; + // double imagN = 0; + + // for(int i=0; i<N; i++){ + // realN += pow(abs(real(output.arr[i]) - real(input.arr[i])), 2); + // imagN += pow(abs(imag(output.arr[i]) - imag(input.arr[i])), 2); + // } + + // double average = (realN+imagN)/N; + // average = pow(average, 0.5); + // cout << average << endl; + + // dcomparr output = enc.decode(pt); + SecKey sk(N); + + auto start2 = high_resolution_clock::now(); + PubKey pk(sk.s, (N/2)+1, ql); + auto stop2 = high_resolution_clock::now(); + auto duration2 = duration_cast<microseconds>(stop2 - start2); + // cout << "public key: " << duration2.count() << endl; + + auto start3 = high_resolution_clock::now(); + EvalKey evk(sk.s, (N/2)+1, ql); + auto stop3 = high_resolution_clock::now(); + auto duration3 = duration_cast<microseconds>(stop3 - start3); + // cout << "eval key: " << 2*duration2.count() << endl; + + CKKS ckks(evk); + + auto start4 = high_resolution_clock::now(); + Ciphertext ct = ckks.encrypt(pt, pk); + auto stop4 = high_resolution_clock::now(); + auto duration4 = duration_cast<microseconds>(stop4 - start4); + cout << "encrypt: " << duration4.count() << endl; + + // Ciphertext ct1 = ckks.encrypt(pt1, pk); + + // auto start5 = high_resolution_clock::now(); + // Polynomial ptOut = ckks.decrypt(ct, sk); + // auto stop5 = high_resolution_clock::now(); + // auto duration5 = duration_cast<microseconds>(stop5 - start5); + // cout << "decrypt: " << duration5.count() << endl; + // ct.c0.printPol(); + + // auto start6 = high_resolution_clock::now(); + // Ciphertext ctadd = ckks.add(ct, ct1); + // auto stop6 = high_resolution_clock::now(); + // auto duration6 = duration_cast<microseconds>(stop6 - start6); + // cout << "add: " << duration6.count() << endl; + + // ctadd.c0.printPol(); + + // Polynomial ptOut = ckks.decrypt(ctadd, sk); + // ptOut.printPol(); + + + // auto start7 = high_resolution_clock::now(); + // Ciphertext ctmult = ckks.multNTT(ct, ct); + Ciphertext ctmultNorm = ckks.mult(ct, ct); + // auto stop7 = high_resolution_clock::now(); + // auto duration7 = duration_cast<microseconds>(stop7 - start7); + // cout << "mult: " << duration7.count() << endl; + + Polynomial multOut = ckks.decrypt(ctmultNorm, sk); + multOut.printPol(); + + dcomparr multOutput = enc.decode(pt); + for(int i=0; i<2; i++){ + cout << multOutput.arr[i] << endl; + } + + + // NTT ntt; + // int64_t w = ntt.genNthRoot(ntt.M, N); + + // auto start7 = high_resolution_clock::now(); + // vector<int64_t> nttRes = ntt.ntt(pt, N*2, w); + // auto stop7 = high_resolution_clock::now(); + // auto duration7 = duration_cast<microseconds>(stop7 - start7); + // cout << "full ntt: " << duration7.count() << endl; + + // auto start8 = high_resolution_clock::now(); + // Ciphertext ct1 = ckks.multNTT(ct,ct); + // auto stop8 = high_resolution_clock::now(); + // auto duration8 = duration_cast<microseconds>(stop8 - start8); + // cout << "mult ntt: " << duration8.count() << endl; + + // auto start9 = high_resolution_clock::now(); + // Polynomial nttOut = ntt.intt(nttRes1, w); + // Ciphertext ctmult = ckks.multNTT(ct, ct); + // auto stop9 = high_resolution_clock::now(); + // auto duration9 = duration_cast<microseconds>(stop9 - start9); + // cout << "intt: " << duration9.count() << endl; + + // int q0 = 67; + // int pl[6] = {61, 67, 71, 73, 79, 59}; + // int64_t ql = q0; + + // for (int i=0; i<3; i++){ + // ql *= pl[i]; + // } + + // SecKey sk = SecKey(2); + // PubKey pk(sk.s, 3, ql); + // EvalKey evk(sk.s, 3, ql); + + // CKKS ckks(4, pk, evk, sk); + // Ciphertext ct = ckks.encrypt(pt); + // ct.c0.printPol(); + // ct.c1.printPol(); + + // cout << endl; + // Polynomial ptout = ckks.decrypt(ct); + // ptout.printPol(); + + // cout << endl; + // Ciphertext ctadd = ckks.add(ct, ct); + // ctadd.c0.printPol(); + // ctadd.c1.printPol(); + + // cout << endl; + // Polynomial ptadd = ckks.decrypt(ctadd); + // ptadd.printPol(); + + // cout << endl; + // Ciphertext ctmult = ckks.mult(ct, ct); + // ctmult.c0.printPol(); + // ctmult.c1.printPol(); + + // cout << endl; + // Polynomial ptmult = ckks.decrypt(ctmult); + // ptmult.printPol(); + + // Encoder enc2(16, 64.0); + // dcomparr outputMult = enc2.decode(ptmult); + // for(int i=0; i<4; i++){ + // cout << outputMult.arr[i] << " "; + // } + // cout << endl; + + bool error = false; + // for(int i=1; i<5; i++){ + // cout << i << endl; + // int N = pow(2, i); + // Encoder enc(N*4, 64.0); + // dcomparr in = randomFill(N); + // Polynomial test = enc.encode(in); + // dcomparr out = enc.decode(test); + // for(int j=0; j<N; j++){ + // if (abs(in.arr[i]-out.arr[i]) > 2.0){ + // error = true; + // break; + // } + // } + // if (error) break; + // } + + // for(int i=1; i<5; i++){ + // int N = pow(2, i); + // Encoder enc(N*4, 64.0); + + // SecKey sk = SecKey(N); + // PubKey pk(sk.s, (N)+1, ql); + // EvalKey evk(sk.s, (N)+1, ql); + // CKKS ckks(N, pk, evk, sk); + + // dcomparr in = randomFill(N); + + // Polynomial test = enc.encode(in); + // Ciphertext ct = ckks.encrypt(test); + // Polynomial testOut = ckks.decrypt(ct); + + // dcomparr out = enc.decode(testOut); + // for(int j=0; j<N; j++){ + // if (abs(in.arr[j]-out.arr[j]) > 2.0){ + // error = true; + // break; + // } + // } + // if (error) break; + // } + + // for(int i=1; i<5; i++){ + // int N = pow(2, i); + // Encoder enc(N*4, 64.0); + + // SecKey sk = SecKey(N); + // PubKey pk(sk.s, (N)+1, ql); + // EvalKey evk(sk.s, (N)+1, ql); + // CKKS ckks(N, pk, evk, sk); + + // dcomparr in = randomFill(N); + + // Polynomial test = enc.encode(in); + // Ciphertext ct = ckks.encrypt(test); + // Ciphertext ctadd = ckks.add(ct,ct); + // Polynomial testOut = ckks.decrypt(ctadd); + + // dcomparr out = enc.decode(testOut); + // dcomplex scale(2.0, 0.0); + // for(int j=0; j<N; j++){ + // if (abs((scale*in.arr[j])-out.arr[j]) > 2.0){ + // error = true; + // break; + // } + // } + // if (error) break; + // } + + // for(int i=1; i<5; i++){ + // int N = pow(2, i); + // Encoder enc(N*4, 64.0); + + // SecKey sk = SecKey(N); + // PubKey pk(sk.s, (N)+1, ql); + // EvalKey evk(sk.s, (N)+1, ql); + // CKKS ckks(N, pk, evk, sk); + + // dcomparr in = randomFill(N); + + // Polynomial test = enc.encode(in); + // Ciphertext ct = ckks.encrypt(test); + // Ciphertext ctadd = ckks.mult(ct,ct); + // Polynomial testOut = ckks.decrypt(ctadd); + + // Polynomial control = (test * test).scaleRoundCoeff(1.0/64.0); + // dcomparr out = enc.decode(testOut); + // dcomparr check = enc.decode(control); + + // for(int j=0; j<N*2; j++){ + // if (abs(check.arr[j]-out.arr[j]) > 2.0){ + // error = true; + // break; + // } + // } + // if (error) break; + // } + + // double input[] = {2.0, 1.0}; + // Polynomial in(2, input); + + // NTT ntt; + // vector<int64_t> out = ntt.ntt(in, 8, 1801542727); + + // for(int i=0; i<8; i++){ + // cout << out[i] << " "; + // } + // cout << endl; + + // Polynomial test = ntt.intt(out, 1801542727); + // test.printPol(); + + // if(error) { + // cout << "Error" << endl; + // } else { + // cout << "Finish" << endl; + // } \ No newline at end of file diff --git a/cuda/test.cu b/cuda/test.cu index 02c7f14708a07d30b73bca88a1ad4a20f118cea9..33a9a6e2c98a0492e7f1441212a7a8debbd70d34 100644 --- a/cuda/test.cu +++ b/cuda/test.cu @@ -14,171 +14,99 @@ using namespace std; using namespace std::chrono; -dcomparr randomFill(int n){ - dcomparr out(n); - for(int i=0; i<n; i++){ - out.arr[i] = 1.0 + 1.0*J; - } - return out; -} int main(){ - // Encoder enc(128, 128.0); - // dcomplex a = 1.0 + 1.0 *J; - // dcomparr input; - // for(int i=0; i<32; i++){ - // input.arr[i] = a; - // } - - // Encoder enc(8, 64.0); - // dcomplex a1 = 3.0 + 4.0 *J; - // dcomplex a2 = 2.0 - 1.0*J; - // dcomparr input(2); - // input.arr[0] = a1; - // input.arr[1] = a2; - - // Polynomial pt = enc.encode(input); - // pt.printPol(); - int q0 = 67; - int pl[6] = {61, 67, 71, 73, 79, 59}; + int pl[6] = {61, 67, 71, 73, 79, 61}; int64_t ql = q0; - for (int i=0; i<3; i++){ + for (int i=0; i<5; i++){ ql *= pl[i]; } - // SecKey sk = SecKey(4); - // PubKey pk(sk.s, 4, ql); - // EvalKey evk(sk.s, 4, ql); - - // cout << "decode" << endl; - // dcomparr out = enc.decode(pt); - // for(int i=0; i<2; i++){ - // cout << out.arr[i] << " "; - // } - // cout << endl; - - bool error = false; - // for(int i=1; i<5; i++){ - // cout << i << endl; - // int N = pow(2, i); - // Encoder enc(N*4, 64.0); - // dcomparr in = randomFill(N); - // Polynomial test = enc.encode(in); - // dcomparr out = enc.decode(test); - // for(int j=0; j<N; j++){ - // if (abs(in.arr[i]-out.arr[i]) > 2.0){ - // error = true; - // break; - // } - // } - // if (error) break; - // } - - // for(int i=1; i<5; i++){ - // int N = pow(2, i); - // Encoder enc(N*4, 64.0); - - // SecKey sk = SecKey(N); - // PubKey pk(sk.s, (N)+1, ql); - // EvalKey evk(sk.s, (N)+1, ql); - // CKKS ckks(N, pk, evk, sk); - - // dcomparr in = randomFill(N); - - // Polynomial test = enc.encode(in); - // Ciphertext ct = ckks.encrypt(test); - // Polynomial testOut = ckks.decrypt(ct); - - // dcomparr out = enc.decode(testOut); - // for(int j=0; j<N; j++){ - // if (abs(in.arr[j]-out.arr[j]) > 2.0){ - // error = true; - // break; - // } - // } - // if (error) break; - // } - - // for(int i=1; i<5; i++){ - // int N = pow(2, i); - // Encoder enc(N*4, 64.0); - - // SecKey sk = SecKey(N); - // PubKey pk(sk.s, (N)+1, ql); - // EvalKey evk(sk.s, (N)+1, ql); - // CKKS ckks(N, pk, evk, sk); - - // dcomparr in = randomFill(N); - - // Polynomial test = enc.encode(in); - // Ciphertext ct = ckks.encrypt(test); - // Ciphertext ctadd = ckks.add(ct,ct); - // Polynomial testOut = ckks.decrypt(ctadd); - - // dcomparr out = enc.decode(testOut); - // dcomplex scale(2.0, 0.0); - // for(int j=0; j<N; j++){ - // if (abs((scale*in.arr[j])-out.arr[j]) > 2.0){ - // error = true; - // break; - // } - // } - // if (error) break; - // } - - // for(int i=1; i<5; i++){ - // int N = pow(2, i); - // Encoder enc(N*4, 64.0); - - // SecKey sk = SecKey(N); - // PubKey pk(sk.s, (N)+1, ql); - // EvalKey evk(sk.s, (N)+1, ql); - // CKKS ckks(N, pk, evk, sk); - - // dcomparr in = randomFill(N); - - // Polynomial test = enc.encode(in); - // Ciphertext ct = ckks.encrypt(test); - // Ciphertext ctadd = ckks.mult(ct,ct); - // Polynomial testOut = ckks.decrypt(ctadd); - - // Polynomial control = (test * test).scaleRoundCoeff(1.0/64.0); - // dcomparr out = enc.decode(testOut); - // dcomparr check = enc.decode(control); - - // for(int j=0; j<N*2; j++){ - // if (abs(check.arr[j]-out.arr[j]) > 2.0){ - // error = true; - // break; - // } - // } - // if (error) break; - // } - - double input[] = {2.0, 1.0}; - Polynomial in(2, input); - - NTT ntt; - vector<int64_t> out = ntt.ntt(in, 8, 1801542727); - - for(int i=0; i<8; i++){ - cout << out[i] << " "; - } + Encoder enc(8, 64.0); + dcomparr input(2); + input.arr[0] = (3.0+4.0*J); + input.arr[1] = (2.0-1.0*J); + + dcomparr input2(2); + input2.arr[0] = (1.0+1.0*J); + input2.arr[1] = (1.0+1.0*J); + + cout << "Input: " << endl; + cout << "Input 1: "; + for(int i=0; i<2; i++) cout << input.arr[i] << " "; + cout << endl; + cout << "Input 2: "; + for(int i=0; i<2; i++) cout << input2.arr[i] << " "; + cout << endl; + cout << endl; + + + cout << "Encoding: " << endl; + cout << "Plaintext 1: "; + Polynomial pt1 = enc.encode(input); + pt1.printPol(); + cout << "Plaintext 2: "; + Polynomial pt2 = enc.encode(input2); + pt2.printPol(); cout << endl; - Polynomial test = ntt.intt(out, 1801542727); - test.printPol(); + SecKey sk(2); + PubKey pk(sk.s, 3, ql); + EvalKey evk(sk.s, 3, ql); - if(error) { - cout << "Error" << endl; - } else { - cout << "Finish" << endl; - } + CKKS ckks(evk); + Ciphertext ct1 = ckks.encrypt(pt1, pk); + Ciphertext ct2 = ckks.encrypt(pt2, pk); - return 0; + cout << "Encryption pt1: " << endl; + cout << "c0: "; ct1.c0.printPol(); + cout << "c1: "; ct1.c1.printPol(); + cout << endl; + + cout << "Ciphertext Addition : (ct1+ct2) " << endl; + Ciphertext ctAdd = ckks.add(ct1, ct2); + Polynomial addOut = ckks.decrypt(ctAdd, sk); + cout << "Pt Addition: "; addOut.printPol(); + dcomparr addRes = enc.decode(addOut); + cout << "Addition Result: "; + for(int i=0; i<2; i++) cout << addRes.arr[i] << " "; + cout << endl; + cout << endl; + + cout << "Ciphertext Multiplication: (ct1 * ct1) " << endl; + Ciphertext ctMult = ckks.mult(ct1, ct1); + Polynomial multOut = ckks.decrypt(ctMult, sk); + cout << "Pt multiplication: "; multOut.printPol(); + dcomparr multRes = enc.decode(multOut); + cout << "Multiplication Result: "; + for(int i=0; i<2; i++) cout << multRes.arr[i] << " "; + cout << endl; + cout << endl; + + cout << "Ciphertext Multiplication (NTT): (ct1 * ct1)" << endl; + Ciphertext ctMultNTT = ckks.multNTT(ct1, ct1); + Polynomial multNTTOut = ckks.decrypt(ctMultNTT, sk); + cout << "Pt multiplication (NTT): "; multNTTOut.printPol(); + dcomparr multNTTRes = enc.decode(multNTTOut); + cout << "Multiplication Result (NTT): "; + for(int i=0; i<2; i++) cout << multNTTRes.arr[i] << " "; + cout << endl; + cout << endl; + cout << "Decryption ct1: " << endl; + Polynomial decrypted1 = ckks.decrypt(ct1, sk); + cout << "Decryption result: "; decrypted1.printPol(); + cout << endl; + cout << "Decoding pt1: " << endl; + dcomparr output = enc.decode(decrypted1); + for(int i=0; i<2; i++){ + cout << output.arr[i] << " "; + } + cout << endl; + + return 0; } \ No newline at end of file diff --git a/cuda/test.o b/cuda/test.o index c7f0951f33be0ac829b0362f8519d12d225d8c76..941694f57231a98e5068e912fb55178b4b2b9e7f 100644 Binary files a/cuda/test.o and b/cuda/test.o differ diff --git a/exp.sh b/exp.sh new file mode 100755 index 0000000000000000000000000000000000000000..6aefbda4dcd16851aeffc971587924a279155f26 --- /dev/null +++ b/exp.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +./test.sh > ./res/exp1.txt +./test.sh > ./res/exp2.txt +./test.sh > ./res/exp3.txt +./test.sh > ./res/exp4.txt +./test.sh > ./res/exp5.txt diff --git a/run b/run index 56659c8159d83dbcac387195f701f5049ebd2ed3..bef6614c785e44a535b0e678de67abc8e3d502f1 100755 Binary files a/run and b/run differ diff --git a/src/ckks.cpp b/src/ckks.cpp index 3714399e92ba451608e83190b3cc44aed63d555d..fb2b43f8df1275fae460c1deccb8ce7d1985ab62 100644 --- a/src/ckks.cpp +++ b/src/ckks.cpp @@ -103,7 +103,7 @@ Ciphertext CKKS::encrypt(Polynomial pt, PubKey pk){ Polynomial CKKS::decrypt(Ciphertext ct, SecKey sk){ Polynomial pt = ct.c0 + (ct.c1 * sk.s); - return pt.modCoeff(ql); + return pt; } Ciphertext CKKS::add(Ciphertext ct1, Ciphertext ct2){ @@ -145,7 +145,7 @@ int getNextPow(int deg){ Ciphertext CKKS::multNTT(Ciphertext ct1, Ciphertext ct2){ int deg = getNextPow(ct1.c0.degree+ct2.c0.degree-1); - int64_t w = ntt.genNthRoot(ntt.M, deg); //ntt.genNthRoot(ntt.M, deg); + int64_t w = 1592366214; //ntt.genNthRoot(ntt.M, deg); Polynomial d1 = fastMult(ct1.c0, ct2.c0, deg, w).modCoeff(ql); Polynomial d2 = (fastMult(ct1.c0, ct2.c1,deg, w) + fastMult(ct2.c0, ct1.c1,deg, w)).modCoeff(ql); diff --git a/src/ckks.h b/src/ckks.h index eae9a6463e18f067b6981b0c592fe15a36288487..a252e7ceb4d2d51b1862490afa9635cef4c51b79 100644 --- a/src/ckks.h +++ b/src/ckks.h @@ -25,11 +25,12 @@ class CKKS { Ciphertext mult(Ciphertext ct1, Ciphertext ct2); Ciphertext multNTT(Ciphertext ct1, Ciphertext ct2); Ciphertext add(Ciphertext ct1, Ciphertext ct2); + Polynomial fastMult(Polynomial p1, Polynomial p2, int deg, int64_t w); private: Polynomial genE(int degree, double var); Polynomial genZO(); - Polynomial fastMult(Polynomial p1, Polynomial p2, int deg, int64_t w); + }; diff --git a/src/encoder.cpp b/src/encoder.cpp index 5b26c755451ebe4fa16a216f20ddce1355f38d3e..ee612476e3f45330c1dc199a072bd0f40c96e62b 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -7,6 +7,8 @@ #include <chrono> +int counter = 0; + using namespace std; using namespace std::chrono; @@ -16,6 +18,8 @@ dcomplex vdot(vector<dcomplex> a, vector<dcomplex> b, int size, int offset){ for(int i=0; i<size; i++){ out += b[i+offset] * conj(a[i]); } + // counter++; + // cout << counter << endl; return out; } @@ -38,10 +42,24 @@ Encoder::Encoder(int in, double inScale){ M = in; scale = inScale; root = exp((2*M_PI/M) * J); - vandermonde = vector<dcomplex>((M/2)*(M/2)); - sigmaRBasis = vector<dcomplex>((M/2)*(M/2)); - initVandermonde(root); - VandermondeTranspose(); + if(M/4 <= pow(2,12)){ + vandermonde = Eigen::MatrixXcd((M/2),(M/2)); + sigmaRBasis = Eigen::MatrixXcd((M/2),(M/2)); + initVandermonde(root); + VandermondeTranspose(); + } else { + sigmaBase = Eigen::VectorXcd(M/2); + vanderBase = Eigen::VectorXcd(M/2); + + for(int j=0; j<M/2; j++){ + int power = (2*j+1); + sigmaBase(j) = pow(root, power); + } + + for(int j=0; j<M/2; j++){ + vanderBase(j) = pow(root, j); + } + } } void Encoder::initVandermonde(dcomplex xi){ @@ -49,7 +67,7 @@ void Encoder::initVandermonde(dcomplex xi){ for(int i=0; i<N; i++){ for(int j=0; j<N; j++){ int power = (2*i+1) * j; - vandermonde[i*N+j] = pow(xi, power); + vandermonde(i,j) = pow(xi, power); } } } @@ -58,7 +76,7 @@ void Encoder::VandermondeTranspose(){ int N = M/2; for(int i=0; i<N; i++){ for(int j=0; j<N; j++){ - sigmaRBasis[j*N+i] = vandermonde[i*N+j]; + sigmaRBasis(j,i) = vandermonde(i,j); } } } @@ -81,20 +99,20 @@ dcomparr Encoder::sigma(Polynomial pol){ dcomparr out(pol.degree/2); for(int i=0; i<N; i++){ for (int j=0; j<M/2; j++){ - out.arr[i] += pol.coeffs[j] * vandermonde[i*M/2+j]; + out.arr[i] += pol.coeffs[j] * vandermonde(i, j); } } return out; }else{ int R = 2; + pol.reduceDeg(); while (R < pol.degree){ R <<=1; } - - dcomparr out(R/2); + dcomparr out(M/4); vector<dcomplex> tempVandermonde = vector<dcomplex>(R*R); for(int i=0; i<R; i++){ @@ -105,7 +123,7 @@ dcomparr Encoder::sigma(Polynomial pol){ } } - for(int i=0; i<R/2; i++){ + for(int i=0; i<N; i++){ for (int j=0; j<R; j++){ if(j<pol.degree){ out.arr[i] += pol.coeffs[j] * tempVandermonde[i*R+j]; @@ -118,21 +136,66 @@ dcomparr Encoder::sigma(Polynomial pol){ } +dcomparr Encoder::largeSigma(Polynomial pol){ + int N = M/4; + pol.reduceDeg(); + + int R = 2; + while (R < pol.degree){ + R <<=1; + } + + dcomparr out(R/2); + for(int i=0; i<R/2; i++){ + for (int j=0; j<R; j++){ + if(j<pol.degree){ + out.arr[i] += pol.coeffs[j] * pow(vanderBase(j), (2*i+1)); + } + } + } + + return out; +} + dcomparr Encoder::decode(Polynomial pol){ auto start = high_resolution_clock::now(); Polynomial unscaled = pol.scaleCoeff(1.0/scale); - dcomparr out = sigma(unscaled); + + if(M/4<=pow(2, 12)){ + dcomparr out = sigma(unscaled); + return out; + } else { + dcomparr out = largeSigma(unscaled); + return out; + } +} + +dcomparr Encoder::computeCoordinate(dcomparr z){ + dcomparr out(M/2); + Eigen::VectorXcd temp = Eigen::Map<Eigen::VectorXcd, Eigen::Unaligned>(z.arr.data(), z.arr.size()); + for (int i=0; i<M/2; i++){ + Eigen::VectorXcd b = sigmaRBasis.row(i); + dcomplex zi = temp.dot(b) / b.dot(b); + out.arr[i] = real(zi); + } - auto stop = high_resolution_clock::now(); - auto duration = duration_cast<microseconds>(stop - start); - //cout << "Decoding: " << duration.count() << endl; return out; } -dcomparr Encoder::computeCoordinate(dcomparr z){ +Eigen::VectorXcd genB(int i, int N, Eigen::VectorXcd base){ + Eigen::VectorXcd b(N); + for(int j=0; j<N; j++){ + b(j) = pow(base(j), i); + } + return b; +} + +dcomparr Encoder::computeLargeCoordinate(dcomparr z){ dcomparr out(M/2); + Eigen::VectorXcd temp = Eigen::Map<Eigen::VectorXcd, Eigen::Unaligned>(z.arr.data(), z.arr.size()); for (int i=0; i<M/2; i++){ - dcomplex zi = vdot(z.arr, sigmaRBasis, M/2, i*M/2) / vdots(sigmaRBasis, sigmaRBasis, M/2, i*M/2); + Eigen::VectorXcd b = genB(i, M/2, sigmaBase); + dcomplex zi = temp.dot(b) / b.dot(b); out.arr[i] = real(zi); } @@ -155,9 +218,15 @@ dcomparr Encoder::coordinateWRR(dcomparr coordinates){ } dcomparr Encoder::discretization(dcomparr z){ - dcomparr coor = computeCoordinate(z); - dcomparr roundedCoor = coordinateWRR(coor); - return roundedCoor; + if(M/4 <= pow(2,12)){ + dcomparr coor = computeCoordinate(z); + dcomparr roundedCoor = coordinateWRR(coor); + return roundedCoor; + } else { + dcomparr coor = computeLargeCoordinate(z); + dcomparr roundedCoor = coordinateWRR(coor); + return roundedCoor; + } } Polynomial Encoder::sigmaInv(dcomparr z){ @@ -184,8 +253,6 @@ Polynomial Encoder::encode(dcomparr input){ dcomparr roundZ = discretization(zPi); Polynomial out = sigmaInv(roundZ); - // Polynomial out(2); - auto stop = high_resolution_clock::now(); auto duration = duration_cast<microseconds>(stop - start); //cout << "Encoding: " << duration.count() << endl; diff --git a/src/encoder.h b/src/encoder.h index 269cd9527b4b5f595ed7de440fd533e6878065fa..7a573e18cb0aaa49bdb7777d93065439f2d3b5ef 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -22,27 +22,32 @@ struct dcomparr { #define ENCODER_H class Encoder { + public: + Encoder(int in, double inScale); + Polynomial encode(dcomparr input); + dcomparr decode(Polynomial pol); + private: int M; double scale; dcomplex root; - vector<dcomplex> vandermonde; - vector<dcomplex> sigmaRBasis; + Eigen::VectorXcd vanderBase; + Eigen::VectorXcd sigmaBase; + Eigen::MatrixXcd vandermonde; + Eigen::MatrixXcd sigmaRBasis; void initVandermonde(dcomplex xi); void VandermondeTranspose(); dcomparr piInv(dcomparr input); dcomparr sigma(Polynomial pol); + dcomparr largeSigma(Polynomial pol); dcomparr computeCoordinate(dcomparr z); + dcomparr computeLargeCoordinate(dcomparr z); dcomparr coordinateWRR(dcomparr coordinates); dcomparr discretization(dcomparr z); Polynomial sigmaInv(dcomparr z); - - public: - Encoder(int in, double inScale); - Polynomial encode(dcomparr input); - dcomparr decode(Polynomial pol); + }; diff --git a/src/pubkey.cpp b/src/pubkey.cpp index 0b526951e93ca1befa25eae13f65899a29127e9f..5e2dd8c1c07c3b37bb4a322cf6ab6a22809d7783 100644 --- a/src/pubkey.cpp +++ b/src/pubkey.cpp @@ -44,7 +44,7 @@ void PubKey::generateA(int degree, int64_t q){ void PubKey::computeB(int q, Polynomial s){ Polynomial err = genE(a.degree, 1.0); - Polynomial temp = (a.scaleCoeff(-1.0)) * s ; + Polynomial temp = (a.scaleCoeff(-1.0)) * s; Polynomial res = temp.modCoeff(q); b = Polynomial(res.degree, res.coeffs); diff --git a/test.cpp b/test.cpp index e6e808199de421791a563d3070ff94ed4de663dd..593a0c439f8d5c2f91db2135a0514b69a23b0047 100644 --- a/test.cpp +++ b/test.cpp @@ -19,205 +19,45 @@ dcomparr randomFill(int n){ return out; } -int main(){ - // Encoder enc(8, 64.0); - // dcomplex a1 = 3.0 + 4.0 *J; - // dcomplex a2 = 2.0 - 1.0*J; - // dcomparr input(2); - // input.arr[0] = a1; - // input.arr[1] = a2; - - // // dcomparr input(128); - // // dcomplex a = 1.0 + 1.0*J; - // // for(int i=0; i<128; i++){ - // // input.arr[i] = a; - // // } - - // Polynomial pt = enc.encode(input); - // pt.printPol(); +int main(){ int q0 = 67; int pl[6] = {61, 67, 71, 73, 79, 61}; int64_t ql = q0; - for (int i=0; i<3; i++){ + for (int i=0; i<5; i++){ ql *= pl[i]; } - // SecKey sk = SecKey(2); - // // sk.s.printPol(); - // PubKey pk(sk.s, 3, ql); - // // cout << "pk: "; - // // pk.b.printPol(); - // EvalKey evk(sk.s, 3, ql); - // cout << "evk: "; - // evk.b.printPol(); + int N = pow(2,15); + int scale = pow(2,10); + Encoder enc(N*4, scale); + dcomparr input = randomFill(N); - // cout << "Control: "; - // Polynomial pt2 = pt * pt; - // pt2.scaleRoundCoeff(1.0/61.0).printPol(); - - // CKKS ckks(4, pk, evk, sk); - // Ciphertext ct = ckks.encrypt(pt); - - // auto start = high_resolution_clock::now(); - // Ciphertext ctadd = ckks.multNTT(ct, ct); - // auto stop = high_resolution_clock::now(); - // auto duration = duration_cast<microseconds>(stop - start); - //cout << "Normal Time: " << duration.count() << endl; - - // cout << "Experiment: "; - // Polynomial ptOut = ckks.decrypt(ctadd); - // ptOut.printPol(); - - bool error = false; - // for(int i=1; i<5; i++){ - // cout << i << endl; - // int N = pow(2, i); - // Encoder enc(N*4, 64.0); - // dcomparr in = randomFill(N); - // Polynomial test = enc.encode(in); - // dcomparr out = enc.decode(test); - // for(int j=0; j<N; j++){ - // if (abs(in.arr[i]-out.arr[i]) > 2.0){ - // error = true; - // break; - // } - // } - // if (error) break; - // } - - // for(int i=1; i<5; i++){ - // int N = pow(2, i); - // Encoder enc(N*4, 64.0); - - // SecKey sk = SecKey(N); - // PubKey pk(sk.s, (N)+1, ql); - // EvalKey evk(sk.s, (N)+1, ql); - // CKKS ckks(N, evk); - - // dcomparr in = randomFill(N); - - // Polynomial test = enc.encode(in); - // Ciphertext ct = ckks.encrypt(test, pk); - // Polynomial testOut = ckks.decrypt(ct, sk); - - // dcomparr out = enc.decode(testOut); - // for(int j=0; j<N; j++){ - // if (abs(in.arr[j]-out.arr[j]) > 2.0){ - // error = true; - // break; - // } - // } - // if (error) break; - // } - - for(int i=7; i<8; i++){ - int N = pow(2, i); - Encoder enc(N*4, 64.0); - - SecKey sk = SecKey(N); - PubKey pk(sk.s, (N)+1, ql); - EvalKey evk(sk.s, (N)+1, ql); - CKKS ckks(N, evk); + Polynomial pt1 = enc.encode(input); - dcomparr in = randomFill(N); - - Polynomial test = enc.encode(in); - Ciphertext ct = ckks.encrypt(test, pk); - Ciphertext ctadd = ckks.add(ct,ct); - Polynomial testOut = ckks.decrypt(ctadd, sk); + SecKey sk(N/2); + PubKey pk(sk.s, N/2, ql); + EvalKey evk(sk.s, N/2, ql); - dcomparr out = enc.decode(testOut); - dcomplex scale(2.0, 0.0); - for(int j=0; j<N; j++){ - if (abs((scale*in.arr[j])-out.arr[j]) > 2.0){ - error = true; - break; - } - } - if (error) break; - } - - // for(int i=7; i<9; i++){ - // int N = pow(2, i); - // Encoder enc(N*4, 64.0); - - // SecKey sk = SecKey(N); - // PubKey pk(sk.s, (N)+1, ql); - // EvalKey evk(sk.s, (N)+1, ql); - // CKKS ckks(N, pk, evk, sk); - - // dcomparr in = randomFill(N); - - // Polynomial test = enc.encode(in); - // Ciphertext ct = ckks.encrypt(test); - // Ciphertext ctadd = ckks.mult(ct,ct); - // Polynomial testOut = ckks.decrypt(ctadd); - - // Polynomial control = (test * test).scaleRoundCoeff(1.0/64.0); - // dcomparr out = enc.decode(testOut); - // dcomparr check = enc.decode(control); - - // for(int j=0; j<N*2; j++){ - // if (abs(check.arr[j]-out.arr[j]) > 2.0){ - // ctadd.c0.printPol(); - // ctadd.c1.printPol(); - // error = true; - // break; - // } - // } - // if (error) break; - // } - - // for(int i=7; i<8; i++){ - // int N = pow(2, i); - // Encoder enc(N*4, 64.0); + CKKS ckks(4, evk); + Ciphertext ct1 = ckks.encrypt(pt1, pk); - // SecKey sk = SecKey(N); - // PubKey pk(sk.s, (N)+1, ql); - // EvalKey evk(sk.s, (N)+1, ql); - // CKKS ckks(N, evk); - - // dcomparr in = randomFill(N); - - // Polynomial test = enc.encode(in); - // Ciphertext ct = ckks.encrypt(test, pk); - // Ciphertext ctadd = ckks.multNTT(ct,ct); - // Polynomial testOut = ckks.decrypt(ctadd, sk); - - // Polynomial control = (test * test).scaleRoundCoeff(1.0/64.0); - // dcomparr out = enc.decode(testOut); - // dcomparr check = enc.decode(control); - - // for(int j=0; j<N; j++){ - // if (abs(check.arr[j]-out.arr[j]) > 2.0){ - // error = true; - // break; - // } - // } - // if (error) break; - // } - - // int N = pow(2, 3); - // Encoder enc(N*4, 64.0); - // SecKey sk = SecKey(N); - // PubKey pk(sk.s, (N)+1, ql); - // EvalKey evk(sk.s, (N)+1, ql); - // CKKS ckks(N, pk, evk, sk); - // dcomparr in = randomFill(N); - - // Polynomial test = enc.encode(in); - // Ciphertext ct = ckks.encrypt(test); - // Ciphertext ctadd = ckks.multNTT(ct,ct); - // Polynomial testOut = ckks.decrypt(ctadd); - - - if(error) { - cout << "Error" << endl; - } else { - cout << "Finish" << endl; - } + NTT ntt; + int64_t w = ntt.genNthRoot(ntt.M, N*2); + auto start = high_resolution_clock::now(); + vector<int64_t> nttOut = ntt.ntt(pt1, N*2, w); + auto stop = high_resolution_clock::now(); + auto duration = duration_cast<microseconds>(stop - start); + cout << "NTT Time: " << duration.count() << endl; + + auto start1 = high_resolution_clock::now(); + Polynomial output = ntt.intt(nttOut, w); + auto stop1 = high_resolution_clock::now(); + auto duration1 = duration_cast<microseconds>(stop1 - start1); + cout << "iNTT Time: " << duration1.count() << endl; + + return 0; } \ No newline at end of file diff --git a/test.o b/test.o index 7c653c930a6ede35e85d127617ac2ebc128c68ef..f8f4c7b994b082a708dbebdc56488d4c99b4d478 100644 Binary files a/test.o and b/test.o differ