diff --git a/bin/ckks.o b/bin/ckks.o
index c93f7ee3db6f082c7cb6c57ddf374675c8b9ffee..594a5b6d1c5d60907aa708134abdc63f547f62ad 100644
Binary files a/bin/ckks.o and b/bin/ckks.o differ
diff --git a/bin/encoder.o b/bin/encoder.o
index 90b7bf07128ce858c5729f29d08ed53347b5bf6b..2d66894c23caf0e68bae302f3dbb767e7abab111 100644
Binary files a/bin/encoder.o and b/bin/encoder.o differ
diff --git a/bin/ntt.o b/bin/ntt.o
index 2e280a44185091b194bf75c23a5bab6b053ee3de..29792475d4283efe7e25daa09b58554e1707f568 100644
Binary files a/bin/ntt.o and b/bin/ntt.o differ
diff --git a/bin/polynomial.o b/bin/polynomial.o
index d7e200b09999cf80452369cba013fc080e75f787..72a138d62baf80eaed622d548cae19dfd03e8eda 100644
Binary files a/bin/polynomial.o and b/bin/polynomial.o differ
diff --git a/bin/pubkey.o b/bin/pubkey.o
index b9353297bfbbfcc9ba00ad6778cb7c491e470c0a..a467481d2e50837ba142aac2e3e2467425a3be00 100644
Binary files a/bin/pubkey.o and b/bin/pubkey.o differ
diff --git a/cuda/bin/ciphertext.o b/cuda/bin/ciphertext.o
index 5189234927a1d5bb60dcd78fdb25d7e6166b3cfd..1580a51b7f93fb54a277011c86dcc99d618d4511 100644
Binary files a/cuda/bin/ciphertext.o and b/cuda/bin/ciphertext.o differ
diff --git a/cuda/bin/ckks.o b/cuda/bin/ckks.o
index d9489959a078a0d0bb0cb622434126ffc14b2fe6..9f27a6dc1bbf9649000510f97701b3f18856fee6 100644
Binary files a/cuda/bin/ckks.o and b/cuda/bin/ckks.o differ
diff --git a/cuda/bin/evalkey.o b/cuda/bin/evalkey.o
index a04a147bcd6515ab49919a770228ea700f2ecc86..0a421c59846a55e67e31b07ed0a2dd8355212af2 100644
Binary files a/cuda/bin/evalkey.o and b/cuda/bin/evalkey.o differ
diff --git a/cuda/bin/ntt.o b/cuda/bin/ntt.o
new file mode 100644
index 0000000000000000000000000000000000000000..d27e85ab1b9dc1e1cf10aa98ac7e9aa7c9406a1a
Binary files /dev/null and b/cuda/bin/ntt.o differ
diff --git a/cuda/bin/parallel.o b/cuda/bin/parallel.o
index 0901e941e6dfc2ee2d3e62e149e0a842c6be92a6..234ae9b2240064456b6c9238a3dc960e0723571d 100644
Binary files a/cuda/bin/parallel.o and b/cuda/bin/parallel.o differ
diff --git a/cuda/bin/polynomial.o b/cuda/bin/polynomial.o
index e4cf05596eee04751b84815c8cce5ef7ec5427eb..3f3e4a8de9a7f0237b2050621639ae91ddb7fc2e 100644
Binary files a/cuda/bin/polynomial.o and b/cuda/bin/polynomial.o differ
diff --git a/cuda/bin/pubkey.o b/cuda/bin/pubkey.o
index ec6254a2069a800328fa0d7453c6c0a4d97c9bdf..c85cfc0bae7cff1d80e94b823755b80e80e88cb4 100644
Binary files a/cuda/bin/pubkey.o and b/cuda/bin/pubkey.o differ
diff --git a/cuda/bin/seckey.o b/cuda/bin/seckey.o
index a1c6ec9d4839487bf2929a22e8413a9ef22409e9..dececb994b5bcc125b327f05e1c465d451ed0c45 100644
Binary files a/cuda/bin/seckey.o and b/cuda/bin/seckey.o differ
diff --git a/cuda/compile.sh b/cuda/compile.sh
index 5ab0cdbcef0bc4c0fc02366bb512b7fc347a5383..a96cf95db77a5cd3e69ef9511f6a9e83112dab01 100755
--- a/cuda/compile.sh
+++ b/cuda/compile.sh
@@ -6,4 +6,5 @@ nvcc -c -o ./bin/parallel.o parallel.cu
 nvcc -c -o ./bin/polynomial.o polynomial.cu
 nvcc -c -o ./bin/pubkey.o pubkey.cu
 nvcc -c -o ./bin/seckey.o seckey.cu
-nvcc -c -o ./bin/evalkey.o evalkey.cu
\ No newline at end of file
+nvcc -c -o ./bin/evalkey.o evalkey.cu
+nvcc -c -o ./bin/ntt.o ntt.cu
\ No newline at end of file
diff --git a/cuda/evalkey.cu b/cuda/evalkey.cu
index 6c886a601b73311daf883854f9d3cc8d5b01d2a4..c2fbe958b78cf9a526b7ceeb571bd9660881c4f2 100644
--- a/cuda/evalkey.cu
+++ b/cuda/evalkey.cu
@@ -7,11 +7,9 @@
 using namespace std;
 
 EvalKey::EvalKey(Polynomial _s, int degree, int64_t q){
-    a = Polynomial(degree);
-    b = Polynomial(degree);
     p = 1000;
 
-    generateA(4, 1000);
+    generateA(degree, 1000);
     computeB(q, _s);
 }
 
@@ -40,7 +38,8 @@ void EvalKey::generateA(int degree, int64_t q){
     for (int i=0; i<degree; i++){
         a_coeffs[i] = (double) dis(gen);
     }
-    a.setCoeffs(a_coeffs);
+
+    a = Polynomial(degree, a_coeffs);
 }
 
 void EvalKey::computeB(int q, Polynomial s){
@@ -49,6 +48,6 @@ void EvalKey::computeB(int q, Polynomial s){
     Polynomial temp = ((a.scaleCoeff(-1.0)) * s) + ev + err;
     Polynomial res = temp.modCoeff(q * p);
 
-    b.setDegree(res.degree);
+    b = Polynomial(res.degree);
     b.setCoeffs(res.coeffs);
 }
\ No newline at end of file
diff --git a/cuda/ntt.cu b/cuda/ntt.cu
index 1d2ee8159869cedcc9ad7fe19a2df1a4a5c030a2..b45b53496c9a26ad19908c5d9dfa7f1d38a141aa 100644
--- a/cuda/ntt.cu
+++ b/cuda/ntt.cu
@@ -1,5 +1,43 @@
 #include "ntt.h"
 
+__device__ int64_t devModExp(int64_t base, int64_t power, int64_t mod){
+    int64_t res = 1;
+    int64_t p = power;
+    int64_t b = base % mod;
+    while (p > 0){
+        if (p & 1){
+            res = (res * b) % mod;
+        }
+        b = (b*b) % mod;
+        p = p >> 1;
+    }
+
+    return res;
+}
+
+__device__ int64_t devModInv(int64_t x, int64_t mod){
+    int64_t t = 0;
+    int64_t t1 = 1;
+    int64_t r = mod;
+    int64_t r1 = x;
+
+    while (r1 != 0){
+        int64_t quot = (int64_t) (r/r1);
+        int64_t temp_t = t;
+        int64_t temp_r = r;
+        t = t1; 
+        t1 = (temp_t - quot * t1);
+        r = r1;
+        r1 = (temp_r % r1);
+    }
+
+    if (t < 0){
+        t = t + mod;
+    }
+    
+    return t;
+}
+
 int64_t NTT::modExp(int64_t base, int64_t power, int64_t mod){
     int64_t res = 1;
     int64_t p = power;
@@ -88,57 +126,78 @@ void NTT::reverse(vector<int64_t> &in, int bitLen){
     }
 }
 
+__global__ void computeNTT(int64_t* p1, int N, int i, int64_t w, int nBit, int64_t *poly){
+    int j = threadIdx.x + blockIdx.x*blockDim.x;
+    int shift = nBit - i - 1;
+    int M = 2013265921;
+    
+    int64_t P = (j >> shift) << shift;
+    int64_t wP = devModExp(w, P, M);
+    int64_t odd = poly[2*j+1] * wP;
+    int64_t even = poly[2*j];
+
+    p1[j] = (even + odd) % M;
+    p1[j+N/2] = (even - odd) % M;
+}
+
+__global__ void correctNeg(int64_t* points, int M){
+    int idx = threadIdx.x + blockIdx.x*blockDim.x;
+    if(points[idx] < 0) points[idx] += M;
+}
+
 void NTT::_ntt(vector<int64_t> &in, int64_t w){
     int N = size(in);
     int nBit = bitLength(N) - 1;
     reverse(in, nBit);
 
-    vector<int> points(N, 0);
+    int64_t temp[N];
+    for(int i=0; i<N; i++) temp[i] = in[i];
+
+    int64_t* input;
+    cudaMalloc((void**)&input, N*sizeof(int64_t));
+    cudaMemcpy(input, temp, N*sizeof(int64_t), cudaMemcpyHostToDevice);
+
+    int64_t* points;
+    cudaMalloc((void**)&points, N*sizeof(int64_t));
+    
     for(int i=0; i<nBit; i++){
-        vector<int64_t> p1;
-        vector<int64_t> p2;
-        for(int j=0; j<N/2; j++){
-            int shift = nBit - i - 1;
-            int64_t P = (j >> shift) << shift;
-            int64_t wP = modExp(w, P, M);
-            int64_t odd = in[2*j+1] * wP;
-            int64_t even = in[2*j];
-            p1.push_back((even + odd) % M);
-            p2.push_back((even - odd) % M);
-        }
+        int64_t* p1;
+        cudaMalloc((void**)&p1, N*sizeof(int64_t));
 
-        for(int k=0; k<N/2; k++){
-            points[k] = p1[k];
-            points[k+N/2] = p2[k];
-        }
+        computeNTT<<<1, N/2>>>(p1, N, i, w, nBit, input);
+        cudaDeviceSynchronize();
 
-        if(1!=nBit){
-            for(int k=0; k<N; k++){
-                in[k] = points[k];
-            }
-        }
-    }
+        cudaMemcpy(points, p1, N*sizeof(int64_t), cudaMemcpyDeviceToDevice);
 
-    for(int k=0; k<N; k++){
-        in[k] = points[k];
-        if(in[k] < 0){
-            in[k] += M;
+        if(i != nBit){
+            cudaMemcpy(input, points, N*sizeof(int64_t), cudaMemcpyDeviceToDevice);
         }
+
+        cudaFree(p1);
     }
+
+    correctNeg<<<1, N>>>(points, 2013265921);
+    cudaMemcpy(temp, points, N*sizeof(int64_t), cudaMemcpyDeviceToHost);
+
+    for(int i=0; i<N; i++) in[i] = temp[i];
 }
 
 vector<int64_t> NTT::ntt(Polynomial in, int degree, int64_t w){
     vector<int64_t> out(degree,0);
+    
+    double input[in.degree];
+    cudaMemcpy(input, in.coeffs, in.degree*sizeof(double), cudaMemcpyDeviceToHost);
+    
     for(int i=0; i<in.degree; i++){
-        out[i] = (int64_t) in.coeffs[i];
+        out[i] = (int64_t) input[i];
     }
+    
     _ntt(out, w);
     return out;
 }
 
 Polynomial NTT::intt(vector<int64_t> &in, int w){
     int N = size(in);
-    Polynomial pOut(N);
     double* coeff = (double*) malloc(N*sizeof(double));
 
     int64_t wInv = modInv(w, M);
@@ -148,7 +207,8 @@ Polynomial NTT::intt(vector<int64_t> &in, int w){
     for(int i=0; i<size(in); i++){
         coeff[i] = (in[i] * nInv) % M;
     }
-    pOut.setCoeffs(coeff);
+
+    Polynomial pOut(N, coeff);
     
     return pOut;
 }
\ No newline at end of file
diff --git a/cuda/pubkey.cu b/cuda/pubkey.cu
index 4c1fb7bd951208c6c7abd0a534aac93ac4c298fe..9e9dbab090d5adcfa98b2332e63f9bcb37821bf4 100644
--- a/cuda/pubkey.cu
+++ b/cuda/pubkey.cu
@@ -7,10 +7,7 @@
 using namespace std;
 
 PubKey::PubKey(Polynomial _s, int degree, int64_t q){
-    a = Polynomial(degree);
-    b = Polynomial(degree);
-
-    generateA(4, 100);
+    generateA(degree, 1000);
     computeB(q, _s);
 }
 
@@ -39,7 +36,7 @@ void PubKey::generateA(int degree, int64_t q){
     for (int i=0; i<degree; i++){
         a_coeffs[i] = (double) dis(gen);
     }
-    a.setCoeffs(a_coeffs);
+    a = Polynomial(degree, a_coeffs);
 }
 
 void PubKey::computeB(int q, Polynomial s){
@@ -47,6 +44,6 @@ void PubKey::computeB(int q, Polynomial s){
     Polynomial temp = (a.scaleCoeff(-1.0)) * s + err;
     Polynomial res = temp.modCoeff(q);
 
-    b.setDegree(res.degree);
+    b = Polynomial(res.degree);
     b.setCoeffs(res.coeffs);
 }
diff --git a/cuda/run b/cuda/run
index 20804cb980d4bb16791febdebd25492dfe56c61e..63310b5b9d6a41736086824680060af5cae67974 100755
Binary files a/cuda/run and b/cuda/run differ
diff --git a/cuda/run.sh b/cuda/run.sh
index 06ce13796cf74e4ebff1061bb82a4ca50de95c3c..baa92c7cf3225da494f816a26a5e2b752e610b59 100755
--- a/cuda/run.sh
+++ b/cuda/run.sh
@@ -2,5 +2,5 @@
 
 ./compile.sh
 nvcc -c -o test.o test.cu 
-nvcc -o run test.o ./bin/ciphertext.o ./bin/ckks.o ./bin/parallel.o ./bin/polynomial.o ./bin/pubkey.o ./bin/seckey.o ./bin/evalkey.o 
+nvcc -o run test.o ./bin/ciphertext.o ./bin/ckks.o ./bin/parallel.o ./bin/polynomial.o ./bin/pubkey.o ./bin/seckey.o ./bin/evalkey.o ./bin/ntt.o 
 ./run
\ No newline at end of file
diff --git a/cuda/test.cu b/cuda/test.cu
index c9d4cc1e7e4131a3a989663395bf79409c788b92..02c7f14708a07d30b73bca88a1ad4a20f118cea9 100644
--- a/cuda/test.cu
+++ b/cuda/test.cu
@@ -7,6 +7,7 @@
 #include "pubkey.h"
 #include "seckey.h"
 #include "evalkey.h"
+#include "ntt.h"
 
 
 using namespace std;
@@ -156,6 +157,19 @@ int main(){
     //     if (error) break;
     // }
 
+    double input[] = {2.0, 1.0};
+    Polynomial in(2, input);
+
+    NTT ntt;
+    vector<int64_t> out = ntt.ntt(in, 8, 1801542727);
+
+    for(int i=0; i<8; i++){
+        cout << out[i] << " ";
+    }
+    cout << endl;
+
+    Polynomial test = ntt.intt(out, 1801542727);
+    test.printPol();
 
     if(error) {
         cout << "Error" << endl;
diff --git a/cuda/test.o b/cuda/test.o
index 296bb9efce91360217237fc9e23e287a11053363..c7f0951f33be0ac829b0362f8519d12d225d8c76 100644
Binary files a/cuda/test.o and b/cuda/test.o differ
diff --git a/run b/run
index e52bfd1631a6ed728b52ed545bf60f12d3be10e1..56659c8159d83dbcac387195f701f5049ebd2ed3 100755
Binary files a/run and b/run differ
diff --git a/src/ckks.cpp b/src/ckks.cpp
index 08694ff057e175177c915a2c55d05f6edb6b7948..3714399e92ba451608e83190b3cc44aed63d555d 100644
--- a/src/ckks.cpp
+++ b/src/ckks.cpp
@@ -38,32 +38,22 @@ vector<int64_t> vMult(vector<int64_t> a, vector<int64_t> b){
   return out;
 }
 
+
 Polynomial CKKS::fastMult(Polynomial p1, Polynomial p2, int deg, int64_t w){
   vector<int64_t> pol1 = ntt.ntt(p1, deg, w);
   vector<int64_t> pol2 = ntt.ntt(p2, deg, w);
+  int64_t p = 4179340454199820289;
   
   vector<int64_t> mult;
   for(int i=0; i<deg; i++){
-    mult.push_back(pol1[i] * pol2[i]);
+    mult.push_back(NTT::modMul(pol1[i], pol2[i], p));
   }
 
-  // if(deg == 16){
-  //   p1.printPol();
-  //   p2.printPol();
-  //   (ntt.intt(mult, w)).printPol();
-  //   // for(int i=0; i<deg; i++){
-  //   //     cout << mult[i] << " ";
-  //   //   }
-  //   //   cout << endl;
-  // }
-
   Polynomial out = ntt.intt(mult, w);
   return out;
 }
 
-CKKS::CKKS(int N, PubKey _pk, EvalKey _evk, SecKey _sk){
-  pk = _pk;
-  sk = _sk;
+CKKS::CKKS(int N, EvalKey _evk){
   evk = _evk;
   deg = N;
   ql = q0;
@@ -102,7 +92,7 @@ Polynomial CKKS::genZO(){
   return zo;
 }
 
-Ciphertext CKKS::encrypt(Polynomial pt){
+Ciphertext CKKS::encrypt(Polynomial pt, PubKey pk){
   Polynomial e = genE(pt.degree, 1.0);
   Polynomial c0 = (pt) + pk.b;
   Polynomial c1 = pk.a;
@@ -111,7 +101,7 @@ Ciphertext CKKS::encrypt(Polynomial pt){
   return ct;
 }
 
-Polynomial CKKS::decrypt(Ciphertext ct){
+Polynomial CKKS::decrypt(Ciphertext ct, SecKey sk){
   Polynomial pt = ct.c0 + (ct.c1 * sk.s);
   return pt.modCoeff(ql);
 }
@@ -123,16 +113,16 @@ Ciphertext CKKS::add(Ciphertext ct1, Ciphertext ct2){
 
 Ciphertext CKKS::mult(Ciphertext ct1, Ciphertext ct2){
   // Multiplication
-  Polynomial d1 = (ct1.c0 * ct2.c0).modCoeff(ql);
-  Polynomial d2 = ((ct1.c0 * ct2.c1) + (ct2.c0 * ct1.c1)).modCoeff(ql); 
-  Polynomial d3 = (ct1.c1 * ct2.c1).modCoeff(ql);
+  Polynomial d1 = Polynomial::polyModMul(ct1.c0, ct2.c0, ql);
+  Polynomial d2 = (Polynomial::polyModMul(ct1.c0, ct2.c1, ql) + Polynomial::polyModMul(ct1.c1, ct2.c0, ql)).modCoeff(ql); 
+  Polynomial d3 = Polynomial::polyModMul(ct1.c1, ct2.c1, ql);
 
   // Relin 
-  Polynomial d3_0 = ((d3 * evk.b).scaleRoundCoeff(1.0/1000.0)).modCoeff(ql);
-  Polynomial d3_1 = ((d3 * evk.a).scaleRoundCoeff(1.0/1000.0)).modCoeff(ql);
+  Polynomial d3_0 = Polynomial::polyModMul(d3, evk.b, ql).scaleRoundCoeff(1.0/1000.0);
+  Polynomial d3_1 = Polynomial::polyModMul(d3, evk.a, ql).scaleRoundCoeff(1.0/1000.0);
 
-  Polynomial outC0 = d1.modCoeff(ql) + d3_0.modCoeff(ql);
-  Polynomial outC1 = d2.modCoeff(ql) + d3_1.modCoeff(ql);
+  Polynomial outC0 = (d1 + d3_0).modCoeff(ql);
+  Polynomial outC1 = (d2 + d3_1).modCoeff(ql);
 
   // Rescale
   ql = (double)ql / (double)pl[level-1];
@@ -141,7 +131,7 @@ Ciphertext CKKS::mult(Ciphertext ct1, Ciphertext ct2){
 
   level -= 1;
   
-  Ciphertext out(c0.modCoeff(ql), c1.modCoeff(ql));
+  Ciphertext out(c0, c1);
   return out;
 }
 
diff --git a/src/ckks.h b/src/ckks.h
index c81e4177c4870a21d3bbdbb9fb7fc374204c142d..eae9a6463e18f067b6981b0c592fe15a36288487 100644
--- a/src/ckks.h
+++ b/src/ckks.h
@@ -16,20 +16,21 @@ class CKKS {
     int q0 = 67;
     int pl[6] = {61, 67, 71, 73, 79, 67};
     int64_t ql;
-    PubKey pk;
     EvalKey evk;
-    SecKey sk;
     NTT ntt;
 
-    CKKS(int N, PubKey _pk, EvalKey _evk, SecKey _sk);
+    CKKS(int N, EvalKey _evk);
+    Ciphertext encrypt(Polynomial pt, PubKey pk);
+    Polynomial decrypt(Ciphertext ct, SecKey sk);
+    Ciphertext mult(Ciphertext ct1, Ciphertext ct2);
+    Ciphertext multNTT(Ciphertext ct1, Ciphertext ct2);
+    Ciphertext add(Ciphertext ct1, Ciphertext ct2);
+    
+  private:
     Polynomial genE(int degree, double var);
     Polynomial genZO();
-    Ciphertext encrypt(Polynomial pt);
-    Polynomial decrypt(Ciphertext ct);
     Polynomial fastMult(Polynomial p1, Polynomial p2, int deg, int64_t w);
-    Ciphertext add(Ciphertext ct1, Ciphertext ct2);
-    Ciphertext mult(Ciphertext ct1, Ciphertext ct2);
-    Ciphertext multNTT(Ciphertext ct1, Ciphertext ct2);
+    
 };
 
 #endif
\ No newline at end of file
diff --git a/src/encoder.cpp b/src/encoder.cpp
index 92de2c302e6aa28ca3ff7815bcc6b12741382076..5b26c755451ebe4fa16a216f20ddce1355f38d3e 100644
--- a/src/encoder.cpp
+++ b/src/encoder.cpp
@@ -41,7 +41,7 @@ Encoder::Encoder(int in, double inScale){
   vandermonde = vector<dcomplex>((M/2)*(M/2));
   sigmaRBasis = vector<dcomplex>((M/2)*(M/2));
   initVandermonde(root);
-  initSigmaRBasis();
+  VandermondeTranspose();
 }
 
 void Encoder::initVandermonde(dcomplex xi){
@@ -54,7 +54,7 @@ void Encoder::initVandermonde(dcomplex xi){
   }
 }
 
-void Encoder::initSigmaRBasis(){
+void Encoder::VandermondeTranspose(){
   int N = M/2;
   for(int i=0; i<N; i++){
     for(int j=0; j<N; j++){
diff --git a/src/encoder.h b/src/encoder.h
index ee3abf98d71afe618b0be69997dfb907d19c5b2a..269cd9527b4b5f595ed7de440fd533e6878065fa 100644
--- a/src/encoder.h
+++ b/src/encoder.h
@@ -22,24 +22,27 @@ struct dcomparr {
 #define ENCODER_H
 
 class Encoder {
-  public: 
+  private: 
     int M;
     double scale;
     dcomplex root;
     vector<dcomplex> vandermonde;
     vector<dcomplex> sigmaRBasis;
 
-    Encoder(int in, double inScale);
     void initVandermonde(dcomplex xi);
-    void initSigmaRBasis();
+    void VandermondeTranspose();
+
     dcomparr piInv(dcomparr input);
     dcomparr sigma(Polynomial pol);
-    dcomparr decode(Polynomial pol);
     dcomparr computeCoordinate(dcomparr z);
     dcomparr coordinateWRR(dcomparr coordinates);
     dcomparr discretization(dcomparr z);
     Polynomial sigmaInv(dcomparr z);
+  
+  public:
+    Encoder(int in, double inScale);
     Polynomial encode(dcomparr input);
+    dcomparr decode(Polynomial pol);
 };
 
 
diff --git a/src/evalkey.h b/src/evalkey.h
index 30a56592feafa1e6a912a6e4462710fc4396ae9a..0232559ce3886fbb3f45574dffa001c72dd79025 100644
--- a/src/evalkey.h
+++ b/src/evalkey.h
@@ -12,6 +12,8 @@ class EvalKey{
 
     EvalKey() = default;
     EvalKey(Polynomial _s, int degree, int64_t q);
+    
+  private:
     Polynomial genE(int degree, double var);
     void generateA(int degree, int64_t q);
     void computeB(int q);
diff --git a/src/ntt.cpp b/src/ntt.cpp
index bd15d650ae152283436ac80f5099a09b89a11083..f3aa4a7aaabbd5d971d615549331aace904c9f2d 100644
--- a/src/ntt.cpp
+++ b/src/ntt.cpp
@@ -3,6 +3,20 @@
 
 using namespace std;
 
+int64_t NTT::modMul(int64_t a, int64_t b, int64_t mod){
+    int64_t res = 0;
+    a = a % mod;
+    while(b){
+        if (b & 1){
+            res = (res + a) % mod;
+        }
+        a = (a*2) % mod;
+        b >>= 1;
+    }
+    return res;
+}
+
+
 int64_t NTT::modExp(int64_t base, int64_t power, int64_t mod){
     int64_t res = 1;
     int64_t p = power;
@@ -96,7 +110,7 @@ void NTT::_ntt(vector<int64_t> &in, int64_t w){
     int nBit = bitLength(N) - 1;
     reverse(in, nBit);
 
-    vector<int> points(N, 0);
+    vector<int64_t> points(N, 0);
     for(int i=0; i<nBit; i++){
         vector<int64_t> p1;
         vector<int64_t> p2;
@@ -104,7 +118,7 @@ void NTT::_ntt(vector<int64_t> &in, int64_t w){
             int shift = nBit - i - 1;
             int64_t P = (j >> shift) << shift;
             int64_t wP = modExp(w, P, M);
-            int64_t odd = in[2*j+1] * wP;
+            int64_t odd = modMul(in[2*j+1], wP, M);
             int64_t even = in[2*j];
             p1.push_back((even + odd) % M);
             p2.push_back((even - odd) % M);
@@ -149,7 +163,7 @@ Polynomial NTT::intt(vector<int64_t> &in, int w){
 
     _ntt(in, wInv);
     for(int i=0; i<size(in); i++){
-        coeff[i] = (in[i] * nInv) % M;
+        coeff[i] = modMul(in[i], nInv, M);
     }
     pOut.setCoeffs(coeff);
     
diff --git a/src/ntt.h b/src/ntt.h
index 5078f1f82c2edb248ad84898dd70fc22fb95d45c..7196ebdd040526c2585c254c5bf26b97dccd0c1c 100644
--- a/src/ntt.h
+++ b/src/ntt.h
@@ -12,15 +12,19 @@ using namespace std;
 class NTT{
     public:
         int M = 2013265921;
+        
+        vector<int64_t> ntt(Polynomial in, int degree, int64_t w);
+        Polynomial intt(vector<int64_t> &in, int w);
+        static int64_t modMul(int64_t a, int64_t b, int64_t mod);
+        int64_t genNthRoot(int mod, int n);
+    
+    private:
         int64_t modExp(int64_t base, int64_t power, int64_t mod);
         int64_t modInv(int64_t x, int64_t mod);
         int bitLength(int x);
         void reverse(vector<int64_t> &in, int bitLen);
         bool existSmallerN(int r, int mod, int n);
-        int64_t genNthRoot(int mod, int n);
         void _ntt(vector<int64_t> &in, int64_t w);
-        vector<int64_t> ntt(Polynomial in, int degree, int64_t w);
-        Polynomial intt(vector<int64_t> &in, int w);
 };
 
 
diff --git a/src/polynomial.cpp b/src/polynomial.cpp
index 7c8225df1872b25cc8909e589181d5d848af4f43..36c8387a330bbbbba8d212c46e59cad85154ff4f 100644
--- a/src/polynomial.cpp
+++ b/src/polynomial.cpp
@@ -9,6 +9,7 @@
 
 using namespace std;
 
+
 Polynomial::Polynomial(int deg){
     degree = deg;
     coeffs = vector<double>(deg);
@@ -17,10 +18,21 @@ Polynomial::Polynomial(int deg){
     }
 }
 
-// Polynomial::~Polynomial(){
-//     //free(coeffs);
-//     coeffs = NULL;
-// }
+double modMul(double in1, double in2, int64_t mod){
+    int64_t a = (int64_t) in1;
+    int64_t b = (int64_t) in2;
+
+    int64_t res = 0;
+    a = a % mod;
+    while(b){
+        if (b & 1){
+            res = (res + a) % mod;
+        }
+        a = (a*2) % mod;
+        b >>= 1;
+    }
+    return (double) res;
+}
 
 Polynomial::Polynomial(int deg, vector<double> coeff){
     degree = deg;
@@ -103,6 +115,24 @@ Polynomial Polynomial::operator + (Polynomial const &obj){
     return out;
 }
 
+Polynomial Polynomial::polyModMul(Polynomial p1, Polynomial p2, int64_t mod){
+    int deg = p1.degree + p2.degree -1;
+    vector<double> coeff(deg, 0);
+
+    for(int i=0; i<p1.degree; i++){
+        for(int j=0; j<p2.degree; j++){
+            coeff[i+j] += modMul(p1.coeffs[i], p2.coeffs[j], mod);
+        }
+    }
+
+    for(int i=0; i<deg; i++){
+        coeff[i] = fmod(coeff[i], (double) mod);
+    }
+
+    Polynomial out(deg, coeff);
+    return out;
+}
+
 Polynomial Polynomial::operator * (Polynomial const &obj){
     int deg = degree + obj.degree -1;
     vector<double> coeff(deg, 0);
diff --git a/src/polynomial.h b/src/polynomial.h
index 7616785f891ee87b3f18f117bbe923c56a8f290d..dc39a371052ca3b252d26edc16ace10327d8fd88 100644
--- a/src/polynomial.h
+++ b/src/polynomial.h
@@ -15,7 +15,6 @@ class Polynomial {
     Polynomial() = default;
     Polynomial(int deg);
     Polynomial(int deg, vector<double> coeff);
-    //~Polynomial();
 
     void setDegree(int N);
     void setCoeffs(vector<double> coeffs);
@@ -23,11 +22,13 @@ class Polynomial {
     Polynomial scaleRoundCoeff(double scale);
     Polynomial modCoeff(int64_t q);
     Polynomial dot(Polynomial const &obj);
-    Polynomial fastMult(Polynomial p1, Polynomial p2);
     Polynomial operator + (Polynomial const &obj);
     Polynomial operator * (Polynomial const &obj);
+    
     void reduceDeg();
 
+    static Polynomial polyModMul(Polynomial p1, Polynomial p2, int64_t mod);
+
     void printPol();
 };
 
diff --git a/src/pubkey.cpp b/src/pubkey.cpp
index 7759da4af655fd7d6c83b033bb648c3008e6462f..0b526951e93ca1befa25eae13f65899a29127e9f 100644
--- a/src/pubkey.cpp
+++ b/src/pubkey.cpp
@@ -9,10 +9,9 @@ using namespace std;
 
 PubKey::PubKey(Polynomial _s, int degree, int64_t q){
     a = Polynomial(degree);
-    s = _s;
 
     generateA(degree, 1000);
-    computeB(q);
+    computeB(q, _s);
 }
 
 Polynomial PubKey::genE(int degree, double var){
@@ -43,7 +42,7 @@ void PubKey::generateA(int degree, int64_t q){
     a.setCoeffs(a_coeffs);
 }
 
-void PubKey::computeB(int q){
+void PubKey::computeB(int q, Polynomial s){
     Polynomial err = genE(a.degree, 1.0);
     Polynomial temp = (a.scaleCoeff(-1.0)) * s ;
     Polynomial res = temp.modCoeff(q);
diff --git a/src/pubkey.h b/src/pubkey.h
index 7929abcb3962a60dab07eabc3d3fa67585ab89f3..4135366220b9b2eaf3f6610d6ed24381238617a1 100644
--- a/src/pubkey.h
+++ b/src/pubkey.h
@@ -7,13 +7,12 @@ class PubKey{
   public:
     Polynomial a;
     Polynomial b;
-    Polynomial s;
 
     PubKey() = default;
     PubKey(Polynomial _s, int degree, int64_t q);
     Polynomial genE(int degree, double var);
     void generateA(int degree, int64_t q);
-    void computeB(int q);
+    void computeB(int q, Polynomial s);
 };
 
 #endif
\ No newline at end of file
diff --git a/src/seckey.h b/src/seckey.h
index 4fa51cc835de9e1d2986b668fdd8ab3b279136de..baed6331dc445a4aeee99760553ebfdf32a9ea0d 100644
--- a/src/seckey.h
+++ b/src/seckey.h
@@ -4,7 +4,6 @@
 #ifndef SECKEY_H
 #define SECKEY_H
 
-
 class SecKey{
   public:
     Polynomial s;
diff --git a/test.cpp b/test.cpp
index 3318bd644b9ddad7e86a54779218cd7fc7395dd1..e6e808199de421791a563d3070ff94ed4de663dd 100644
--- a/test.cpp
+++ b/test.cpp
@@ -94,13 +94,13 @@ int main(){
     //     SecKey sk = SecKey(N);
     //     PubKey pk(sk.s, (N)+1, ql);
     //     EvalKey evk(sk.s, (N)+1, ql);
-    //     CKKS ckks(N, pk, evk, sk);
+    //     CKKS ckks(N, evk);
 
     //     dcomparr in = randomFill(N);
         
     //     Polynomial test = enc.encode(in);
-    //     Ciphertext ct = ckks.encrypt(test);
-    //     Polynomial testOut = ckks.decrypt(ct);
+    //     Ciphertext ct = ckks.encrypt(test, pk);
+    //     Polynomial testOut = ckks.decrypt(ct, sk);
 
     //     dcomparr out = enc.decode(testOut);
     //     for(int j=0; j<N; j++){
@@ -112,34 +112,34 @@ int main(){
     //     if (error) break;
     // }
 
-    // for(int i=1; i<5; i++){
-    //     int N = pow(2, i);
-    //     Encoder enc(N*4, 64.0);
+    for(int i=7; i<8; i++){
+        int N = pow(2, i);
+        Encoder enc(N*4, 64.0);
 
-    //     SecKey sk = SecKey(N);
-    //     PubKey pk(sk.s, (N)+1, ql);
-    //     EvalKey evk(sk.s, (N)+1, ql);
-    //     CKKS ckks(N, pk, evk, sk);
+        SecKey sk = SecKey(N);
+        PubKey pk(sk.s, (N)+1, ql);
+        EvalKey evk(sk.s, (N)+1, ql);
+        CKKS ckks(N, evk);
 
-    //     dcomparr in = randomFill(N);
+        dcomparr in = randomFill(N);
         
-    //     Polynomial test = enc.encode(in);
-    //     Ciphertext ct = ckks.encrypt(test);
-    //     Ciphertext ctadd = ckks.add(ct,ct);
-    //     Polynomial testOut = ckks.decrypt(ctadd);
-
-    //     dcomparr out = enc.decode(testOut);
-    //     dcomplex scale(2.0, 0.0);
-    //     for(int j=0; j<N; j++){
-    //         if (abs((scale*in.arr[j])-out.arr[j]) > 2.0){
-    //             error = true;
-    //             break;
-    //         }
-    //     }
-    //     if (error) break;
-    // }
+        Polynomial test = enc.encode(in);
+        Ciphertext ct = ckks.encrypt(test, pk);
+        Ciphertext ctadd = ckks.add(ct,ct);
+        Polynomial testOut = ckks.decrypt(ctadd, sk);
+
+        dcomparr out = enc.decode(testOut);
+        dcomplex scale(2.0, 0.0);
+        for(int j=0; j<N; j++){
+            if (abs((scale*in.arr[j])-out.arr[j]) > 2.0){
+                error = true;
+                break;
+            }
+        }
+        if (error) break;
+    }
 
-    // for(int i=1; i<5; i++){
+    // for(int i=7; i<9; i++){
     //     int N = pow(2, i);
     //     Encoder enc(N*4, 64.0);
 
@@ -161,6 +161,8 @@ int main(){
 
     //     for(int j=0; j<N*2; j++){
     //         if (abs(check.arr[j]-out.arr[j]) > 2.0){
+    //             ctadd.c0.printPol();
+    //             ctadd.c1.printPol();
     //             error = true;
     //             break;
     //         }
@@ -168,21 +170,21 @@ int main(){
     //     if (error) break;
     // }
 
-    // for(int i=1; i<6; i++){
+    // for(int i=7; i<8; i++){
     //     int N = pow(2, i);
     //     Encoder enc(N*4, 64.0);
 
     //     SecKey sk = SecKey(N);
     //     PubKey pk(sk.s, (N)+1, ql);
     //     EvalKey evk(sk.s, (N)+1, ql);
-    //     CKKS ckks(N, pk, evk, sk);
+    //     CKKS ckks(N, evk);
 
     //     dcomparr in = randomFill(N);
         
     //     Polynomial test = enc.encode(in);
-    //     Ciphertext ct = ckks.encrypt(test);
+    //     Ciphertext ct = ckks.encrypt(test, pk);
     //     Ciphertext ctadd = ckks.multNTT(ct,ct);
-    //     Polynomial testOut = ckks.decrypt(ctadd);
+    //     Polynomial testOut = ckks.decrypt(ctadd, sk);
 
     //     Polynomial control = (test * test).scaleRoundCoeff(1.0/64.0);
     //     dcomparr out = enc.decode(testOut);
diff --git a/test.o b/test.o
index 0e43df3c066fa9c513bed0331216e418988b32f9..7c653c930a6ede35e85d127617ac2ebc128c68ef 100644
Binary files a/test.o and b/test.o differ