diff --git a/cuda/.vscode/c_cpp_properties.json b/cuda/.vscode/c_cpp_properties.json new file mode 100644 index 0000000000000000000000000000000000000000..862aed8796dcd7cc17c0264d58b81e6348556d50 --- /dev/null +++ b/cuda/.vscode/c_cpp_properties.json @@ -0,0 +1,16 @@ +{ + "configurations": [ + { + "name": "Linux", + "includePath": [ + "${workspaceFolder}/**" + ], + "defines": [], + "compilerPath": "/usr/bin/gcc", + "cStandard": "gnu17", + "cppStandard": "gnu++14", + "intelliSenseMode": "linux-gcc-x64" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/cuda/.vscode/settings.json b/cuda/.vscode/settings.json new file mode 100644 index 0000000000000000000000000000000000000000..43f52f3f933b2c66b0fdbbf4b6c93bc6d1af6d7e --- /dev/null +++ b/cuda/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "files.associations": { + "iostream": "cpp", + "chrono": "cpp", + "ostream": "cpp" + } +} \ No newline at end of file diff --git a/cuda/bin/ciphertext.o b/cuda/bin/ciphertext.o new file mode 100644 index 0000000000000000000000000000000000000000..5189234927a1d5bb60dcd78fdb25d7e6166b3cfd Binary files /dev/null and b/cuda/bin/ciphertext.o differ diff --git a/cuda/bin/ckks.o b/cuda/bin/ckks.o new file mode 100644 index 0000000000000000000000000000000000000000..d9489959a078a0d0bb0cb622434126ffc14b2fe6 Binary files /dev/null and b/cuda/bin/ckks.o differ diff --git a/cuda/bin/evalkey.o b/cuda/bin/evalkey.o new file mode 100644 index 0000000000000000000000000000000000000000..a04a147bcd6515ab49919a770228ea700f2ecc86 Binary files /dev/null and b/cuda/bin/evalkey.o differ diff --git a/cuda/bin/parallel.o b/cuda/bin/parallel.o new file mode 100644 index 0000000000000000000000000000000000000000..0901e941e6dfc2ee2d3e62e149e0a842c6be92a6 Binary files /dev/null and b/cuda/bin/parallel.o differ diff --git a/cuda/bin/polynomial.o b/cuda/bin/polynomial.o new file mode 100644 index 0000000000000000000000000000000000000000..e4cf05596eee04751b84815c8cce5ef7ec5427eb Binary files /dev/null and b/cuda/bin/polynomial.o differ diff --git a/cuda/bin/pubkey.o b/cuda/bin/pubkey.o new file mode 100644 index 0000000000000000000000000000000000000000..ec6254a2069a800328fa0d7453c6c0a4d97c9bdf Binary files /dev/null and b/cuda/bin/pubkey.o differ diff --git a/cuda/bin/seckey.o b/cuda/bin/seckey.o new file mode 100644 index 0000000000000000000000000000000000000000..a1c6ec9d4839487bf2929a22e8413a9ef22409e9 Binary files /dev/null and b/cuda/bin/seckey.o differ diff --git a/cuda/ciphertext.cu b/cuda/ciphertext.cu new file mode 100644 index 0000000000000000000000000000000000000000..80460e0d1fb6cfcf3c3c27c59460070ac9374a2e --- /dev/null +++ b/cuda/ciphertext.cu @@ -0,0 +1,21 @@ +#include <complex> +#include <cmath> +#include <random> +#include "ciphertext.h" +#include <iostream> +using namespace std; + + +Ciphertext::Ciphertext(Polynomial _c0, Polynomial _c1){ + c0 = Polynomial(_c0.degree); + c0.setCoeffs(_c0.coeffs); + c1 = Polynomial(_c1.degree); + c1.setCoeffs(_c1.coeffs); +} + +Ciphertext Ciphertext:: operator + (Ciphertext const &obj){ + Polynomial out1 = c0 + obj.c0; + Polynomial out2 = c1 + obj.c1; + Ciphertext out(out1, out2); + return out; +} \ No newline at end of file diff --git a/cuda/ciphertext.h b/cuda/ciphertext.h new file mode 100644 index 0000000000000000000000000000000000000000..fe752a9ce9de7d360d273a33ccda6773cb73c80c --- /dev/null +++ b/cuda/ciphertext.h @@ -0,0 +1,15 @@ +#include "polynomial.h" + +#ifndef CIPHERTEXT_H +#define CIPHERTEXT_H + +class Ciphertext { + public: + Polynomial c0; + Polynomial c1; + + Ciphertext(Polynomial _c0, Polynomial _c1); + Ciphertext operator + (Ciphertext const &obj); +}; + +#endif \ No newline at end of file diff --git a/cuda/ckks.cu b/cuda/ckks.cu new file mode 100644 index 0000000000000000000000000000000000000000..2570db100f31ada490311c8e285abe020f4469d6 --- /dev/null +++ b/cuda/ckks.cu @@ -0,0 +1,58 @@ +#include "ckks.h" +#include <random> + +using namespace std; + +CKKS::CKKS(int N, PubKey _pk, EvalKey _evk, SecKey _sk){ + pk = _pk; + sk = _sk; + evk = _evk; + deg = N; + ql = q0; + for (int i=0; i<6; i++){ + ql *= pl[i]; + } +} + +Ciphertext CKKS::encrypt(Polynomial pt){ + //Polynomial e = genE(pt.degree, 1.0); + Polynomial c0 = (pt) + pk.b; + Polynomial c1 = pk.a; + + Ciphertext ct(c0.modCoeff(ql), c1.modCoeff(ql)); + return ct; +} + +Polynomial CKKS::decrypt(Ciphertext ct){ + Polynomial pt = ct.c0 + (ct.c1 * sk.s); + return pt.modCoeff(ql); +} + +Ciphertext CKKS::add(Ciphertext ct1, Ciphertext ct2){ + Ciphertext out = ct1 + ct2; + return out; +} + +Ciphertext CKKS::mult(Ciphertext ct1, Ciphertext ct2){ + // Multiplication + Polynomial d1 = (ct1.c0 * ct2.c0).modCoeff(ql); + Polynomial d2 = ((ct1.c0 * ct2.c1) + (ct2.c0 * ct1.c1)).modCoeff(ql); + Polynomial d3 = (ct1.c1 * ct2.c1).modCoeff(ql); + + // Relin + Polynomial d3_0 = (d3 * evk.b).scaleRoundCoeff(1.0/1000.0); + Polynomial d3_1 = (d3 * evk.a).scaleRoundCoeff(1.0/1000.0); + + Polynomial outC0 = d1.modCoeff(ql) + d3_0.modCoeff(ql); + Polynomial outC1 = d2.modCoeff(ql) + d3_1.modCoeff(ql); + + // Rescale + ql = (double)ql / (double)pl[level-1]; + Polynomial c0 = outC0.scaleRoundCoeff(1.0/(double)pl[level-1]); + Polynomial c1 = outC1.scaleRoundCoeff(1.0/(double)pl[level-1]); + + level -= 1; + + Ciphertext out(c0.modCoeff(ql), c1.modCoeff(ql)); + return out; +} \ No newline at end of file diff --git a/cuda/ckks.h b/cuda/ckks.h new file mode 100644 index 0000000000000000000000000000000000000000..d4ae662e15d8acff8b45cbf98d2f7cd24126286c --- /dev/null +++ b/cuda/ckks.h @@ -0,0 +1,29 @@ +#include "polynomial.h" +#include "pubkey.h" +#include "seckey.h" +#include "evalkey.h" +#include "ciphertext.h" + +#ifndef CKKS_H +#define CKKS_H + +class CKKS { + public: + int deg; + int level = 6; + int q0 = 67; + int pl[6] = {61, 67, 71, 73, 79, 59}; + int64_t ql; + PubKey pk; + SecKey sk; + EvalKey evk; + + CKKS(int N, PubKey _pk, EvalKey _evk, SecKey _sk); + Polynomial fastMult(Polynomial p1, Polynomial p2); + Ciphertext add(Ciphertext ct1, Ciphertext ct2); + Ciphertext mult(Ciphertext ct1, Ciphertext ct2); + Ciphertext encrypt(Polynomial pt); + Polynomial decrypt(Ciphertext ct); +}; + +#endif \ No newline at end of file diff --git a/cuda/compile.sh b/cuda/compile.sh new file mode 100755 index 0000000000000000000000000000000000000000..5ab0cdbcef0bc4c0fc02366bb512b7fc347a5383 --- /dev/null +++ b/cuda/compile.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +nvcc -c -o ./bin/ciphertext.o ciphertext.cu +nvcc -c -o ./bin/ckks.o ckks.cu +nvcc -c -o ./bin/parallel.o parallel.cu +nvcc -c -o ./bin/polynomial.o polynomial.cu +nvcc -c -o ./bin/pubkey.o pubkey.cu +nvcc -c -o ./bin/seckey.o seckey.cu +nvcc -c -o ./bin/evalkey.o evalkey.cu \ No newline at end of file diff --git a/cuda/evalkey.cu b/cuda/evalkey.cu new file mode 100644 index 0000000000000000000000000000000000000000..6c886a601b73311daf883854f9d3cc8d5b01d2a4 --- /dev/null +++ b/cuda/evalkey.cu @@ -0,0 +1,54 @@ +#include<iostream> +#include <complex> +#include <cmath> +#include <random> +#include "evalkey.h" + +using namespace std; + +EvalKey::EvalKey(Polynomial _s, int degree, int64_t q){ + a = Polynomial(degree); + b = Polynomial(degree); + p = 1000; + + generateA(4, 1000); + computeB(q, _s); +} + +Polynomial EvalKey::genE(int degree, double var){ + double errCoeffs[degree]; + + random_device rd; + mt19937 gen(rd()); + normal_distribution<double> dis(0, var); + + for(int i=0; i<degree; i++){ + errCoeffs[i] = (double) round(dis(gen)); + } + + Polynomial err(degree, errCoeffs); + return err; +} + +void EvalKey::generateA(int degree, int64_t q){ + int64_t half_q = (p*q)/2; + random_device rd; + mt19937 gen(rd()); + uniform_int_distribution<int64_t> dis(-half_q, 0); + double a_coeffs[degree]; + + for (int i=0; i<degree; i++){ + a_coeffs[i] = (double) dis(gen); + } + a.setCoeffs(a_coeffs); +} + +void EvalKey::computeB(int q, Polynomial s){ + Polynomial err = genE(a.degree, 1.0); + Polynomial ev = (s * s).scaleCoeff(p); + Polynomial temp = ((a.scaleCoeff(-1.0)) * s) + ev + err; + Polynomial res = temp.modCoeff(q * p); + + b.setDegree(res.degree); + b.setCoeffs(res.coeffs); +} \ No newline at end of file diff --git a/cuda/evalkey.h b/cuda/evalkey.h new file mode 100644 index 0000000000000000000000000000000000000000..d05eaa25eb22cf63e528b3065d805fceb5a5e96c --- /dev/null +++ b/cuda/evalkey.h @@ -0,0 +1,19 @@ +#include "polynomial.h" + +#ifndef EVALKEY_H +#define EVALKEY_H + +class EvalKey{ + public: + Polynomial a; + Polynomial b; + int64_t p; + + EvalKey() = default; + EvalKey(Polynomial _s, int degree, int64_t q); + Polynomial genE(int degree, double var); + void generateA(int degree, int64_t q); + void computeB(int q, Polynomial s); +}; + +#endif \ No newline at end of file diff --git a/cuda/ntt.cu b/cuda/ntt.cu new file mode 100644 index 0000000000000000000000000000000000000000..1d2ee8159869cedcc9ad7fe19a2df1a4a5c030a2 --- /dev/null +++ b/cuda/ntt.cu @@ -0,0 +1,154 @@ +#include "ntt.h" + +int64_t NTT::modExp(int64_t base, int64_t power, int64_t mod){ + int64_t res = 1; + int64_t p = power; + int64_t b = base % mod; + while (p > 0){ + if (p & 1){ + res = (res * b) % mod; + } + b = (b*b) % mod; + p = p >> 1; + } + + return res; +} + +int64_t NTT::modInv(int64_t x, int64_t mod){ + int64_t t = 0; + int64_t t1 = 1; + int64_t r = mod; + int64_t r1 = x; + + while (r1 != 0){ + int64_t quot = (int64_t) (r/r1); + int64_t temp_t = t; + int64_t temp_r = r; + t = t1; + t1 = (temp_t - quot * t1); + r = r1; + r1 = (temp_r % r1); + } + + if (t < 0){ + t = t + mod; + } + + return t; +} + +int NTT::bitLength(int x){ + int a = x; + int len = 0; + while (a!=0){ + a >>= 1; + len += 1; + } + return len; +} + +bool NTT::existSmallerN(int r, int mod, int n){ + for(int k=2; k<n; k++){ + if(modExp(r, k, mod) == 1){ + return true; + } + } + return false; +} + +int64_t NTT::genNthRoot(int mod, int n){ + int p = mod - 1; + int range = mod - 1 + 1; + while (true){ + int64_t a = rand() % range + 1; + int64_t b = modExp(a, p/n, mod); + if (!existSmallerN(b, mod, n)){ + return b; + } + } +} + +void NTT::reverse(vector<int64_t> &in, int bitLen){ + for (int i=0; i<size(in); i++){ + int64_t revN = 0; + for(int j=0; j<bitLen ; j++){ + if ((i >> j) & 1){ + revN |= 1 << (bitLen-1-j); + } + } + int64_t coeff = in[i]; + + if (revN > i){ + coeff ^= in[revN]; + in[revN] ^= coeff; + coeff ^= in[revN]; + in[i] = coeff; + } + } +} + +void NTT::_ntt(vector<int64_t> &in, int64_t w){ + int N = size(in); + int nBit = bitLength(N) - 1; + reverse(in, nBit); + + vector<int> points(N, 0); + for(int i=0; i<nBit; i++){ + vector<int64_t> p1; + vector<int64_t> p2; + for(int j=0; j<N/2; j++){ + int shift = nBit - i - 1; + int64_t P = (j >> shift) << shift; + int64_t wP = modExp(w, P, M); + int64_t odd = in[2*j+1] * wP; + int64_t even = in[2*j]; + p1.push_back((even + odd) % M); + p2.push_back((even - odd) % M); + } + + for(int k=0; k<N/2; k++){ + points[k] = p1[k]; + points[k+N/2] = p2[k]; + } + + if(1!=nBit){ + for(int k=0; k<N; k++){ + in[k] = points[k]; + } + } + } + + for(int k=0; k<N; k++){ + in[k] = points[k]; + if(in[k] < 0){ + in[k] += M; + } + } +} + +vector<int64_t> NTT::ntt(Polynomial in, int degree, int64_t w){ + vector<int64_t> out(degree,0); + for(int i=0; i<in.degree; i++){ + out[i] = (int64_t) in.coeffs[i]; + } + _ntt(out, w); + return out; +} + +Polynomial NTT::intt(vector<int64_t> &in, int w){ + int N = size(in); + Polynomial pOut(N); + double* coeff = (double*) malloc(N*sizeof(double)); + + int64_t wInv = modInv(w, M); + int64_t nInv = modInv(N, M); + + _ntt(in, wInv); + for(int i=0; i<size(in); i++){ + coeff[i] = (in[i] * nInv) % M; + } + pOut.setCoeffs(coeff); + + return pOut; +} \ No newline at end of file diff --git a/cuda/ntt.h b/cuda/ntt.h new file mode 100644 index 0000000000000000000000000000000000000000..5078f1f82c2edb248ad84898dd70fc22fb95d45c --- /dev/null +++ b/cuda/ntt.h @@ -0,0 +1,27 @@ +#include "polynomial.h" +#include <iostream> +#include <vector> + +using namespace std; + +#define size(c) ((int)(c).size()) + +#ifndef NTT_H +#define NTT_H + +class NTT{ + public: + int M = 2013265921; + int64_t modExp(int64_t base, int64_t power, int64_t mod); + int64_t modInv(int64_t x, int64_t mod); + int bitLength(int x); + void reverse(vector<int64_t> &in, int bitLen); + bool existSmallerN(int r, int mod, int n); + int64_t genNthRoot(int mod, int n); + void _ntt(vector<int64_t> &in, int64_t w); + vector<int64_t> ntt(Polynomial in, int degree, int64_t w); + Polynomial intt(vector<int64_t> &in, int w); +}; + + +#endif \ No newline at end of file diff --git a/cuda/parallel b/cuda/parallel new file mode 100755 index 0000000000000000000000000000000000000000..05253c6f0005e9a40ed5142e70b69c188422d14f Binary files /dev/null and b/cuda/parallel differ diff --git a/cuda/parallel.cu b/cuda/parallel.cu new file mode 100644 index 0000000000000000000000000000000000000000..29d587b9e64c28b0b32d26a4729b96d8ecb586f0 --- /dev/null +++ b/cuda/parallel.cu @@ -0,0 +1,179 @@ +#include <iostream> +#include <complex> +#include <cuComplex.h> +#include <cmath> +#include <cuda_runtime.h> +#include <curand.h> +#include <chrono> +#include <curand_kernel.h> +#include "cublas_v2.h" +#include "parallel.h" + +using namespace std; + +using namespace std::chrono; + + +__device__ cuDoubleComplex power(cuDoubleComplex base, int power){ + cuDoubleComplex res = make_cuDoubleComplex(1.0, 0.0); + while(power > 0){ + if (power & 1){ + res = cuCmul(res, base); + } + + power = power >> 1; + base = cuCmul(base, base); + } + return res; +} + +__global__ void initVandermonde(cuDoubleComplex *vand, int M, cuDoubleComplex xi){ + int ROW = blockIdx.y*blockDim.y+threadIdx.y; + int COL = blockIdx.x*blockDim.x+threadIdx.x; + int exp = (2*ROW+1)*COL; + vand[ROW * M + COL] = power(xi, exp); + if(ROW == 0 && COL == 0) printf("test = %f\n", cuCreal(power(xi, exp))); +} + +__global__ void transpose(cuDoubleComplex *dest, cuDoubleComplex *source, int N){ + int ROW = blockIdx.y*blockDim.y+threadIdx.y; + int COL = blockIdx.x*blockDim.x+threadIdx.x; + + if (ROW < N && COL < N){ + int idxIn = ROW * N + COL; + int idxOut = COL * N + ROW; + dest[idxOut] = source[idxIn]; + } +} + +__global__ void expScale(cuDoubleComplex *target, int N, double scale){ + int idx = threadIdx.x + blockIdx.x*blockDim.x; + target[idx].x = target[idx].x * scale; + target[idx].y = target[idx].y * scale; + target[N-idx-1] = cuConj(target[idx]); +} + +__device__ cuDoubleComplex vdotc(cuDoubleComplex *a, cuDoubleComplex *b, int inca, int incb, int N){ + cuDoubleComplex res = make_cuDoubleComplex(0.0, 0.0); + for(int i=0; i<N; i++){ + res = cuCadd(res, cuCmul(a[i+inca], cuConj(b[i+incb]))); + } + return res; +} + +__global__ void computeCoord(double *res, cuDoubleComplex *target, cuDoubleComplex *vand, int M){ + int idx = threadIdx.x + blockIdx.x*blockDim.x; + double temp = cuCreal(vdotc(vand, target, idx*M, 0, M))/cuCreal(vdotc(vand, vand, idx*M, idx*M, M)); + res[idx] = temp; +} + +__global__ void setup_kernel(curandState *state){ + int idx = threadIdx.x+blockDim.x*blockIdx.x; + curand_init(1234, idx, 0, &state[idx]); +} + +__global__ void coordWRR(double *target){ + int idx = threadIdx.x + blockIdx.x*blockDim.x; + target[idx] = (double) round(target[idx]); +} + +__global__ void unscale(double *target, double scale){ + int idx = threadIdx.x + blockIdx.x*blockDim.x; + target[idx] = target[idx]/scale; +} + + +__global__ void sigma(double *pol, cuDoubleComplex *vand, int M, cuDoubleComplex *target){ + int idx = threadIdx.x + blockIdx.x*blockDim.x; + target[idx] = make_cuDoubleComplex(0.0, 0.0); + for(int i=0; i<M; i++){ + target[idx].x += vand[idx*M+i].x * pol[i]; + target[idx].y += vand[idx*M+i].y * pol[i]; + } +} + +Encoder::Encoder(int Min, double s){ + M = Min; + scale = s; + root = exp((2*M_PI/M) * J); + cuDoubleComplex xi = make_cuDoubleComplex(real(root), imag(root)); + + int N = M/2; + cudaMalloc((void**)&vand, N*N*sizeof(cuDoubleComplex)); + dim3 threadsPerBlock(N, N); + dim3 blocksPerGrid(1, 1); + + if (N*N > 512){ + threadsPerBlock.x = 16; + threadsPerBlock.y = 16; + blocksPerGrid.x = ceil(double(N)/double(threadsPerBlock.x)); + blocksPerGrid.y = ceil(double(N)/double(threadsPerBlock.y)); + } + + initVandermonde<<<blocksPerGrid, threadsPerBlock>>>(vand, M/2, xi); + cudaDeviceSynchronize(); + + dcomplex *test = (dcomplex *) malloc(64*64*sizeof(dcomplex)); + cudaMemcpy(test, vand, 64*64*sizeof(dcomplex), cudaMemcpyDeviceToHost); + + cudaMalloc((void**)&sigmaRBasis, N*N*sizeof(cuDoubleComplex)); + transpose<<<blocksPerGrid, threadsPerBlock>>>(sigmaRBasis, vand, M/2); +} + +Polynomial Encoder::encode(dcomparr input){ + cuDoubleComplex *target; + size_t size = M/2; + Polynomial out(M/2); + + cudaMalloc((void**) &target, size * sizeof(cuDoubleComplex)); + cudaMemcpy(target, input.arr, M/4 * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice); + + int block = 1; + int threadPerBlock = M/2; + if(threadPerBlock > 1024){ + threadPerBlock = 1024; + block = ceil((double)(M/2)/1024.0); + } + + auto start = high_resolution_clock::now(); + expScale<<<block,threadPerBlock/2>>>(target, M/2, scale); + + double *res; + cudaMalloc((void**)&res, M/2*sizeof(double)); + computeCoord<<<block,threadPerBlock>>>(res, target, sigmaRBasis, M/2); + + coordWRR<<<block,threadPerBlock>>>(res); + + out.setCoeffs(res); + auto stop = high_resolution_clock::now(); + auto duration = duration_cast<microseconds>(stop - start); + cout << "Encoding: " << duration.count() << endl; + return out; +} + +dcomparr Encoder::decode(Polynomial pt){ + auto start = high_resolution_clock::now(); + + int block = 1; + int threadPerBlock = M/2; + if(threadPerBlock > 1024){ + threadPerBlock = 1024; + block = ceil((double)(M/2)/1024.0); + } + + unscale<<<block,threadPerBlock>>>(pt.coeffs, scale); + + cuDoubleComplex *target; + cudaMalloc((void**)&target, M/4 * sizeof(cuDoubleComplex)); + sigma<<<block,threadPerBlock/2>>>(pt.coeffs, vand, M/2, target); + + dcomparr output(M/4); + size_t size_out = (M/4)*sizeof(dcomplex); + cudaMemcpy(output.arr, target, size_out, cudaMemcpyDeviceToHost); + + auto stop = high_resolution_clock::now(); + auto duration = duration_cast<microseconds>(stop - start); + cout << "Decoding: " << duration.count() << endl; + return output; +} + diff --git a/cuda/parallel.h b/cuda/parallel.h new file mode 100644 index 0000000000000000000000000000000000000000..2ef3eb634df2a134e3bca97eb2ca17a16373fd1b --- /dev/null +++ b/cuda/parallel.h @@ -0,0 +1,36 @@ +#include <complex> +#include <cuComplex.h> +#include "polynomial.h" + +using namespace std; + +#define J dcomplex(0.0,1.0) +#define I make_cuDoubleComplex(0.0, 1.0) +#define SIZE 10000 + +typedef complex<double> dcomplex; + +struct dcomparr { + dcomplex* arr; + dcomparr(int size){ + arr = (dcomplex*) malloc(size*sizeof(dcomplex)); + } +}; + +#ifndef PARALLEL_H +#define PARALLEL_H + +class Encoder{ + public: + int M; + double scale; + dcomplex root; + cuDoubleComplex *vand; + cuDoubleComplex *sigmaRBasis; + + Encoder(int M, double scale); + Polynomial encode(dcomparr input); + dcomparr decode(Polynomial pt); +}; + +#endif \ No newline at end of file diff --git a/cuda/polynomial.cu b/cuda/polynomial.cu new file mode 100644 index 0000000000000000000000000000000000000000..8f5944cddeb19130950fb94caabe2bb79b0bd82b --- /dev/null +++ b/cuda/polynomial.cu @@ -0,0 +1,127 @@ +#include <complex> +#include <cmath> +#include <random> +#include <algorithm> +#include <string.h> +#include <iostream> +#include "polynomial.h" + +using namespace std; + + +__global__ void polyAdd(double* a, int aDeg, double* b, int bDeg, double *res){ + int idx = threadIdx.x; + res[idx] = 0.0; + if(idx < aDeg) res[idx] += a[idx]; + if(idx < bDeg) res[idx] += b[idx]; +} + +__global__ void polyMod(double *res, double *in, int mod){ + int idx = threadIdx.x; + res[idx] = (double) ((int) in[idx] % mod); +} + +__global__ void polyScale(double *res, double *in, double scale){ + int idx = threadIdx.x; + res[idx] = in[idx] * scale; +} + +__global__ void polyRoundScale(double *res, double *in, double scale){ + int idx = threadIdx.x; + res[idx] = (double) round(in[idx] * scale); +} + +__global__ void polyMult(double *res, double *a, double *b, int deg){ + int idx = threadIdx.x; + res[idx] = 0; + for(int i=0; i<deg; i++){ + for(int j=0; j<deg; j++){ + if(j+i == idx){ + res[idx] += a[i] * b[j]; + } + } + } +} + +Polynomial::Polynomial(int deg){ + degree = deg; + cudaMalloc((void**)&coeffs, degree*sizeof(double)); +} + +Polynomial::Polynomial(int deg, double* coeff){ + degree = deg; + cudaMalloc((void**)&coeffs, degree*sizeof(double)); + cudaMemcpy(coeffs, coeff, degree*sizeof(double), cudaMemcpyHostToDevice); +} + +void Polynomial::setCoeffs(double *coeff){ + cudaMemcpy(coeffs, coeff, degree*sizeof(double), cudaMemcpyDeviceToDevice); +} + +Polynomial Polynomial::operator + (Polynomial const &obj){ + int deg = max(obj.degree, degree); + double* resCoeff; + cudaMalloc((void**)&resCoeff, deg*sizeof(double)); + polyAdd<<<1,deg>>>(coeffs, degree, obj.coeffs, obj.degree, resCoeff); + + Polynomial out(deg); + out.setCoeffs(resCoeff); + return out; +} + +Polynomial Polynomial::operator * (Polynomial const &obj){ + int deg = obj.degree + degree; + double *resCoeff; + cudaMalloc((void**)&resCoeff, deg*sizeof(double)); + polyMult<<<1, deg>>>(resCoeff, coeffs, obj.coeffs, deg); + Polynomial out(deg); + out.setCoeffs(resCoeff); + return out; +} + +void Polynomial::setDegree(int deg){ + degree = deg; +} + +Polynomial Polynomial::modCoeff(int q){ + Polynomial out(degree); + double *resCoeff; + cudaMalloc((void**)&resCoeff, degree*sizeof(double)); + polyMod<<<1, degree>>>(resCoeff, coeffs, q); + out.setCoeffs(resCoeff); + return out; +} + +Polynomial Polynomial::scaleCoeff(double scale){ + Polynomial out(degree); + double *resCoeff; + cudaMalloc((void**)&resCoeff, degree*sizeof(double)); + polyScale<<<1, degree>>>(resCoeff, coeffs, scale); + out.setCoeffs(resCoeff); + return out; +} + +Polynomial Polynomial::scaleRoundCoeff(double scale){ + Polynomial out(degree); + double *resCoeff; + cudaMalloc((void**)&resCoeff, degree*sizeof(double)); + polyRoundScale<<<1, degree>>>(resCoeff, coeffs, scale); + out.setCoeffs(resCoeff); + return out; +} + +void Polynomial::printPol(){ + double *temp = (double*) malloc(degree * sizeof(double)); + cudaMemcpy(temp, coeffs, degree * sizeof(double), cudaMemcpyDeviceToHost); + for(int i=0; i<degree; i++){ + cout << temp[i]; + if(i != 0){ + cout << " x^" << i; + } + + if (i != degree-1){ + cout << " + "; + } + } + cout << endl; +} diff --git a/cuda/polynomial.h b/cuda/polynomial.h new file mode 100644 index 0000000000000000000000000000000000000000..98f3882683aad99b1c404d547665ea34257bfdb3 --- /dev/null +++ b/cuda/polynomial.h @@ -0,0 +1,26 @@ +#include<iostream> + +#ifndef POLYNOMIAL_H +#define POLYNOMIAL_H + +class Polynomial { + public: + int degree; + double* coeffs; + + Polynomial() = default; + Polynomial(int deg); + Polynomial(int deg, double* coeff); + + void setCoeffs(double* coeff); + void setDegree(int deg); + Polynomial modCoeff(int q); + Polynomial scaleCoeff(double scale); + Polynomial scaleRoundCoeff(double scale); + Polynomial operator + (Polynomial const &obj); + Polynomial operator * (Polynomial const &obj); + void printPol(); +}; + + +#endif \ No newline at end of file diff --git a/cuda/pubkey.cu b/cuda/pubkey.cu new file mode 100644 index 0000000000000000000000000000000000000000..4c1fb7bd951208c6c7abd0a534aac93ac4c298fe --- /dev/null +++ b/cuda/pubkey.cu @@ -0,0 +1,52 @@ +#include<iostream> +#include <complex> +#include <cmath> +#include <random> +#include "pubkey.h" + +using namespace std; + +PubKey::PubKey(Polynomial _s, int degree, int64_t q){ + a = Polynomial(degree); + b = Polynomial(degree); + + generateA(4, 100); + computeB(q, _s); +} + +Polynomial PubKey::genE(int degree, double var){ + double errCoeffs[degree]; + + random_device rd; + mt19937 gen(rd()); + normal_distribution<double> dis(0, var); + + for(int i=0; i<degree; i++){ + errCoeffs[i] = (double) round(dis(gen)); + } + + Polynomial err(degree, errCoeffs); + return err; +} + +void PubKey::generateA(int degree, int64_t q){ + int64_t half_q = q/2; + random_device rd; + mt19937 gen(rd()); + uniform_int_distribution<int64_t> dis(-half_q, 0); + double a_coeffs[degree]; + + for (int i=0; i<degree; i++){ + a_coeffs[i] = (double) dis(gen); + } + a.setCoeffs(a_coeffs); +} + +void PubKey::computeB(int q, Polynomial s){ + Polynomial err = genE(a.degree, 1.0); + Polynomial temp = (a.scaleCoeff(-1.0)) * s + err; + Polynomial res = temp.modCoeff(q); + + b.setDegree(res.degree); + b.setCoeffs(res.coeffs); +} diff --git a/cuda/pubkey.h b/cuda/pubkey.h new file mode 100644 index 0000000000000000000000000000000000000000..0958da4d274f216e977f729d26403a71a1c31020 --- /dev/null +++ b/cuda/pubkey.h @@ -0,0 +1,18 @@ +#include "polynomial.h" + +#ifndef PUBKEY_H +#define PUBKEY_H + +class PubKey{ + public: + Polynomial a; + Polynomial b; + + PubKey() = default; + PubKey(Polynomial s, int degree, int64_t q); + Polynomial genE(int degree, double var); + void generateA(int degree, int64_t q); + void computeB(int q, Polynomial s); +}; + +#endif \ No newline at end of file diff --git a/cuda/run b/cuda/run new file mode 100755 index 0000000000000000000000000000000000000000..20804cb980d4bb16791febdebd25492dfe56c61e Binary files /dev/null and b/cuda/run differ diff --git a/cuda/run.sh b/cuda/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..06ce13796cf74e4ebff1061bb82a4ca50de95c3c --- /dev/null +++ b/cuda/run.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +./compile.sh +nvcc -c -o test.o test.cu +nvcc -o run test.o ./bin/ciphertext.o ./bin/ckks.o ./bin/parallel.o ./bin/polynomial.o ./bin/pubkey.o ./bin/seckey.o ./bin/evalkey.o +./run \ No newline at end of file diff --git a/cuda/seckey.cu b/cuda/seckey.cu new file mode 100644 index 0000000000000000000000000000000000000000..d0efd3915d2130c7dc3c2ea2a5065ce2c084e7b3 --- /dev/null +++ b/cuda/seckey.cu @@ -0,0 +1,29 @@ +#include <random> +#include <string> +#include <algorithm> +#include <random> +#include "seckey.h" + +using namespace std; + +SecKey::SecKey(int deg){ + double coeff[deg]; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> distr(deg/2, deg); + + int h = distr(gen); + string ones(h, '1'); + string zeros((deg-h), '0'); + + string key = ones + zeros; + random_shuffle(key.begin(), key.end()); + + for(int i=0; i<deg; i++){ + char temp = key.at(i); + coeff[i] = (double) (atoi(&temp)); + } + + s = Polynomial(deg, coeff); +} \ No newline at end of file diff --git a/cuda/seckey.h b/cuda/seckey.h new file mode 100644 index 0000000000000000000000000000000000000000..4fa51cc835de9e1d2986b668fdd8ab3b279136de --- /dev/null +++ b/cuda/seckey.h @@ -0,0 +1,15 @@ +#include "polynomial.h" +#include <string> + +#ifndef SECKEY_H +#define SECKEY_H + + +class SecKey{ + public: + Polynomial s; + SecKey() = default; + SecKey(int deg); +}; + +#endif \ No newline at end of file diff --git a/cuda/test.cu b/cuda/test.cu new file mode 100644 index 0000000000000000000000000000000000000000..c9d4cc1e7e4131a3a989663395bf79409c788b92 --- /dev/null +++ b/cuda/test.cu @@ -0,0 +1,170 @@ +#include <iostream> +#include <chrono> + +#include "polynomial.h" +#include "parallel.h" +#include "ckks.h" +#include "pubkey.h" +#include "seckey.h" +#include "evalkey.h" + + +using namespace std; + +using namespace std::chrono; + +dcomparr randomFill(int n){ + dcomparr out(n); + for(int i=0; i<n; i++){ + out.arr[i] = 1.0 + 1.0*J; + } + return out; +} + +int main(){ + // Encoder enc(128, 128.0); + // dcomplex a = 1.0 + 1.0 *J; + // dcomparr input; + // for(int i=0; i<32; i++){ + // input.arr[i] = a; + // } + + // Encoder enc(8, 64.0); + // dcomplex a1 = 3.0 + 4.0 *J; + // dcomplex a2 = 2.0 - 1.0*J; + // dcomparr input(2); + // input.arr[0] = a1; + // input.arr[1] = a2; + + // Polynomial pt = enc.encode(input); + // pt.printPol(); + + int q0 = 67; + int pl[6] = {61, 67, 71, 73, 79, 59}; + int64_t ql = q0; + + for (int i=0; i<3; i++){ + ql *= pl[i]; + } + + // SecKey sk = SecKey(4); + // PubKey pk(sk.s, 4, ql); + // EvalKey evk(sk.s, 4, ql); + + // cout << "decode" << endl; + // dcomparr out = enc.decode(pt); + // for(int i=0; i<2; i++){ + // cout << out.arr[i] << " "; + // } + // cout << endl; + + bool error = false; + // for(int i=1; i<5; i++){ + // cout << i << endl; + // int N = pow(2, i); + // Encoder enc(N*4, 64.0); + // dcomparr in = randomFill(N); + // Polynomial test = enc.encode(in); + // dcomparr out = enc.decode(test); + // for(int j=0; j<N; j++){ + // if (abs(in.arr[i]-out.arr[i]) > 2.0){ + // error = true; + // break; + // } + // } + // if (error) break; + // } + + // for(int i=1; i<5; i++){ + // int N = pow(2, i); + // Encoder enc(N*4, 64.0); + + // SecKey sk = SecKey(N); + // PubKey pk(sk.s, (N)+1, ql); + // EvalKey evk(sk.s, (N)+1, ql); + // CKKS ckks(N, pk, evk, sk); + + // dcomparr in = randomFill(N); + + // Polynomial test = enc.encode(in); + // Ciphertext ct = ckks.encrypt(test); + // Polynomial testOut = ckks.decrypt(ct); + + // dcomparr out = enc.decode(testOut); + // for(int j=0; j<N; j++){ + // if (abs(in.arr[j]-out.arr[j]) > 2.0){ + // error = true; + // break; + // } + // } + // if (error) break; + // } + + // for(int i=1; i<5; i++){ + // int N = pow(2, i); + // Encoder enc(N*4, 64.0); + + // SecKey sk = SecKey(N); + // PubKey pk(sk.s, (N)+1, ql); + // EvalKey evk(sk.s, (N)+1, ql); + // CKKS ckks(N, pk, evk, sk); + + // dcomparr in = randomFill(N); + + // Polynomial test = enc.encode(in); + // Ciphertext ct = ckks.encrypt(test); + // Ciphertext ctadd = ckks.add(ct,ct); + // Polynomial testOut = ckks.decrypt(ctadd); + + // dcomparr out = enc.decode(testOut); + // dcomplex scale(2.0, 0.0); + // for(int j=0; j<N; j++){ + // if (abs((scale*in.arr[j])-out.arr[j]) > 2.0){ + // error = true; + // break; + // } + // } + // if (error) break; + // } + + // for(int i=1; i<5; i++){ + // int N = pow(2, i); + // Encoder enc(N*4, 64.0); + + // SecKey sk = SecKey(N); + // PubKey pk(sk.s, (N)+1, ql); + // EvalKey evk(sk.s, (N)+1, ql); + // CKKS ckks(N, pk, evk, sk); + + // dcomparr in = randomFill(N); + + // Polynomial test = enc.encode(in); + // Ciphertext ct = ckks.encrypt(test); + // Ciphertext ctadd = ckks.mult(ct,ct); + // Polynomial testOut = ckks.decrypt(ctadd); + + // Polynomial control = (test * test).scaleRoundCoeff(1.0/64.0); + // dcomparr out = enc.decode(testOut); + // dcomparr check = enc.decode(control); + + // for(int j=0; j<N*2; j++){ + // if (abs(check.arr[j]-out.arr[j]) > 2.0){ + // error = true; + // break; + // } + // } + // if (error) break; + // } + + + if(error) { + cout << "Error" << endl; + } else { + cout << "Finish" << endl; + } + + return 0; + + + +} \ No newline at end of file diff --git a/cuda/test.o b/cuda/test.o new file mode 100644 index 0000000000000000000000000000000000000000..296bb9efce91360217237fc9e23e287a11053363 Binary files /dev/null and b/cuda/test.o differ diff --git a/cuda/tester b/cuda/tester new file mode 100755 index 0000000000000000000000000000000000000000..f7055a3ef821cb040ae63d7f2b5d2311584f23c4 Binary files /dev/null and b/cuda/tester differ