diff --git a/bin/encoder.o b/bin/encoder.o
index 2d66894c23caf0e68bae302f3dbb767e7abab111..93f732aa58952280e9321ea171eec27dab42d68c 100644
Binary files a/bin/encoder.o and b/bin/encoder.o differ
diff --git a/bin/ntt.o b/bin/ntt.o
index 29792475d4283efe7e25daa09b58554e1707f568..ff7369a84e538a9bab531fb8eeda61cd4c2042db 100644
Binary files a/bin/ntt.o and b/bin/ntt.o differ
diff --git a/cuda/bin/ciphertext.o b/cuda/bin/ciphertext.o
index 1580a51b7f93fb54a277011c86dcc99d618d4511..0dbbf06bc3fccbf33e11c2f14e319f5a3ee5f607 100644
Binary files a/cuda/bin/ciphertext.o and b/cuda/bin/ciphertext.o differ
diff --git a/cuda/bin/ckks.o b/cuda/bin/ckks.o
index 9f27a6dc1bbf9649000510f97701b3f18856fee6..1f6d1b586906392db61a50a2cf6a28a707706ecd 100644
Binary files a/cuda/bin/ckks.o and b/cuda/bin/ckks.o differ
diff --git a/cuda/bin/evalkey.o b/cuda/bin/evalkey.o
index 0a421c59846a55e67e31b07ed0a2dd8355212af2..9ad9294ce903a9120383f94e83cf1b1adfd81dc5 100644
Binary files a/cuda/bin/evalkey.o and b/cuda/bin/evalkey.o differ
diff --git a/cuda/bin/ntt.o b/cuda/bin/ntt.o
index d27e85ab1b9dc1e1cf10aa98ac7e9aa7c9406a1a..e3479d5295d83abe5e251fde0df36dfdbd863867 100644
Binary files a/cuda/bin/ntt.o and b/cuda/bin/ntt.o differ
diff --git a/cuda/bin/parallel.o b/cuda/bin/parallel.o
index 234ae9b2240064456b6c9238a3dc960e0723571d..ca5019f77d0231e6cee231b0ebb7eed377c64fc8 100644
Binary files a/cuda/bin/parallel.o and b/cuda/bin/parallel.o differ
diff --git a/cuda/bin/polynomial.o b/cuda/bin/polynomial.o
index 3f3e4a8de9a7f0237b2050621639ae91ddb7fc2e..226795151d3eb1e578bbb56c34e8890eb8e77654 100644
Binary files a/cuda/bin/polynomial.o and b/cuda/bin/polynomial.o differ
diff --git a/cuda/bin/pubkey.o b/cuda/bin/pubkey.o
index c85cfc0bae7cff1d80e94b823755b80e80e88cb4..7af577e01f0b25ed2b04e15e0534fde8a5e1411e 100644
Binary files a/cuda/bin/pubkey.o and b/cuda/bin/pubkey.o differ
diff --git a/cuda/bin/seckey.o b/cuda/bin/seckey.o
index dececb994b5bcc125b327f05e1c465d451ed0c45..7a26dfbffd75e14bb1e892a68d112eef482e21bb 100644
Binary files a/cuda/bin/seckey.o and b/cuda/bin/seckey.o differ
diff --git a/cuda/ckks.cu b/cuda/ckks.cu
index 2570db100f31ada490311c8e285abe020f4469d6..ff47c80a5c9bdf28f501b9c586bb1bd9181e33bf 100644
--- a/cuda/ckks.cu
+++ b/cuda/ckks.cu
@@ -3,18 +3,16 @@
 
 using namespace std;
 
-CKKS::CKKS(int N, PubKey _pk, EvalKey _evk, SecKey _sk){
-  pk = _pk;
-  sk = _sk;
+CKKS::CKKS(EvalKey _evk){
   evk = _evk;
-  deg = N;
   ql = q0;
   for (int i=0; i<6; i++){
     ql *= pl[i];
   }
 }
 
-Ciphertext CKKS::encrypt(Polynomial pt){
+
+Ciphertext CKKS::encrypt(Polynomial pt, PubKey pk){
   //Polynomial e = genE(pt.degree, 1.0);
   Polynomial c0 = (pt) + pk.b;
   Polynomial c1 = pk.a;
@@ -23,7 +21,7 @@ Ciphertext CKKS::encrypt(Polynomial pt){
   return ct;
 }
 
-Polynomial CKKS::decrypt(Ciphertext ct){
+Polynomial CKKS::decrypt(Ciphertext ct, SecKey sk){
   Polynomial pt = ct.c0 + (ct.c1 * sk.s);
   return pt.modCoeff(ql);
 }
@@ -33,6 +31,20 @@ Ciphertext CKKS::add(Ciphertext ct1, Ciphertext ct2){
   return out;
 }
 
+Polynomial CKKS::fastMult(Polynomial p1, Polynomial p2, int deg, int64_t w){
+  vector<int64_t> pol1 = ntt.ntt(p1, deg, w);
+  vector<int64_t> pol2 = ntt.ntt(p2, deg, w);
+  
+  int64_t p = 4179340454199820289;
+  vector<int64_t> mult(deg, 0);
+  for(int i=0; i<deg; i++){
+    mult.push_back(NTT::modMul(pol1[i], pol2[i], p));
+  }
+
+  Polynomial out = ntt.intt(mult, w);
+  return out;
+}
+
 Ciphertext CKKS::mult(Ciphertext ct1, Ciphertext ct2){
   // Multiplication
   Polynomial d1 = (ct1.c0 * ct2.c0).modCoeff(ql);
@@ -55,4 +67,18 @@ Ciphertext CKKS::mult(Ciphertext ct1, Ciphertext ct2){
   
   Ciphertext out(c0.modCoeff(ql), c1.modCoeff(ql));
   return out;
+}
+
+int getNextPow(int deg){
+  int out = 2;
+  while(out < deg){
+    out <<= 1;
+  }
+  return out;
+}
+
+Ciphertext CKKS::multNTT(Ciphertext ct1, Ciphertext ct2){
+  level += 1;
+  Ciphertext out = mult(ct1, ct2);
+  return out;
 }
\ No newline at end of file
diff --git a/cuda/ckks.h b/cuda/ckks.h
index d4ae662e15d8acff8b45cbf98d2f7cd24126286c..87edf3f33ecedbda476fe8f3e6b10f0708de0456 100644
--- a/cuda/ckks.h
+++ b/cuda/ckks.h
@@ -1,4 +1,5 @@
 #include "polynomial.h"
+#include "ntt.h"
 #include "pubkey.h"
 #include "seckey.h"
 #include "evalkey.h"
@@ -8,22 +9,22 @@
 #define CKKS_H
 
 class CKKS {
-  public:
-    int deg;
+  private:
     int level = 6;
     int q0 = 67;
-    int pl[6] = {61, 67, 71, 73, 79, 59};
+    int pl[6] = {61, 67, 71, 73, 79, 67};
     int64_t ql;
-    PubKey pk;
-    SecKey sk;
     EvalKey evk;
+    NTT ntt;
 
-    CKKS(int N, PubKey _pk, EvalKey _evk, SecKey _sk);
-    Polynomial fastMult(Polynomial p1, Polynomial p2);
+  public:
+    CKKS(EvalKey _evk);
+    Polynomial fastMult(Polynomial p1, Polynomial p2, int deg, int64_t w);
     Ciphertext add(Ciphertext ct1, Ciphertext ct2);
     Ciphertext mult(Ciphertext ct1, Ciphertext ct2);
-    Ciphertext encrypt(Polynomial pt);
-    Polynomial decrypt(Ciphertext ct);
+    Ciphertext multNTT(Ciphertext ct1, Ciphertext ct2);
+    Ciphertext encrypt(Polynomial pt, PubKey pk);
+    Polynomial decrypt(Ciphertext ct, SecKey sk);
 };
 
 #endif
\ No newline at end of file
diff --git a/cuda/evalkey.cu b/cuda/evalkey.cu
index c2fbe958b78cf9a526b7ceeb571bd9660881c4f2..7156c5c4ec09468d00e9e767bca1201bad7e6817 100644
--- a/cuda/evalkey.cu
+++ b/cuda/evalkey.cu
@@ -29,10 +29,10 @@ Polynomial EvalKey::genE(int degree, double var){
 }
 
 void EvalKey::generateA(int degree, int64_t q){
-    int64_t half_q = (p*q)/2;
+    int64_t half_q = (q)/2;
     random_device rd;
     mt19937 gen(rd());
-    uniform_int_distribution<int64_t> dis(-half_q, 0);
+    uniform_int_distribution<int64_t> dis(0, half_q);
     double a_coeffs[degree];
     
     for (int i=0; i<degree; i++){
@@ -45,9 +45,10 @@ void EvalKey::generateA(int degree, int64_t q){
 void EvalKey::computeB(int q, Polynomial s){
     Polynomial err = genE(a.degree, 1.0);
     Polynomial ev = (s * s).scaleCoeff(p);
-    Polynomial temp = ((a.scaleCoeff(-1.0)) * s) + ev + err;
+    Polynomial temp = ((a.scaleCoeff(-1.0)) * s) + ev;
     Polynomial res = temp.modCoeff(q * p);
 
     b = Polynomial(res.degree);
     b.setCoeffs(res.coeffs);
+    
 }
\ No newline at end of file
diff --git a/cuda/ntt.cu b/cuda/ntt.cu
index b45b53496c9a26ad19908c5d9dfa7f1d38a141aa..7c89eb9cc4adfd21e5f5902a275252c2df222100 100644
--- a/cuda/ntt.cu
+++ b/cuda/ntt.cu
@@ -1,4 +1,7 @@
 #include "ntt.h"
+#include <chrono>
+
+using namespace std::chrono;
 
 __device__ int64_t devModExp(int64_t base, int64_t power, int64_t mod){
     int64_t res = 1;
@@ -108,7 +111,7 @@ int64_t NTT::genNthRoot(int mod, int n){
 }
 
 void NTT::reverse(vector<int64_t> &in, int bitLen){
-    for (int i=0; i<size(in); i++){
+    for (int i=0; i<in.size(); i++){
         int64_t revN = 0;
         for(int j=0; j<bitLen ; j++){
             if ((i >> j) & 1){
@@ -145,8 +148,22 @@ __global__ void correctNeg(int64_t* points, int M){
     if(points[idx] < 0) points[idx] += M;
 }
 
+int64_t NTT::modMul(int64_t a, int64_t b, int64_t mod){
+    int64_t res = 0;
+    a = a % mod;
+    while(b){
+        if (b & 1){
+            res = (res + a) % mod;
+        }
+        a = (a*2) % mod;
+        b >>= 1;
+    }
+    
+    return res;
+}
+
 void NTT::_ntt(vector<int64_t> &in, int64_t w){
-    int N = size(in);
+    int N = in.size();
     int nBit = bitLength(N) - 1;
     reverse(in, nBit);
 
@@ -159,12 +176,19 @@ void NTT::_ntt(vector<int64_t> &in, int64_t w){
 
     int64_t* points;
     cudaMalloc((void**)&points, N*sizeof(int64_t));
+
+    int block = 1;
+    int threadPerBlock = N/2;
+    if(N/2 > 1024){
+        threadPerBlock = 1024;
+        block = ceil((double)(N/2)/1024.0);
+    }
     
     for(int i=0; i<nBit; i++){
         int64_t* p1;
         cudaMalloc((void**)&p1, N*sizeof(int64_t));
 
-        computeNTT<<<1, N/2>>>(p1, N, i, w, nBit, input);
+        computeNTT<<<block, threadPerBlock>>>(p1, N, i, w, nBit, input);
         cudaDeviceSynchronize();
 
         cudaMemcpy(points, p1, N*sizeof(int64_t), cudaMemcpyDeviceToDevice);
@@ -176,10 +200,13 @@ void NTT::_ntt(vector<int64_t> &in, int64_t w){
         cudaFree(p1);
     }
 
-    correctNeg<<<1, N>>>(points, 2013265921);
+    correctNeg<<<block*2, threadPerBlock>>>(points, 2013265921);
     cudaMemcpy(temp, points, N*sizeof(int64_t), cudaMemcpyDeviceToHost);
 
-    for(int i=0; i<N; i++) in[i] = temp[i];
+    for(int i=0; i<N; i++) {
+        in[i] = temp[i];
+    }
+
 }
 
 vector<int64_t> NTT::ntt(Polynomial in, int degree, int64_t w){
@@ -191,21 +218,22 @@ vector<int64_t> NTT::ntt(Polynomial in, int degree, int64_t w){
     for(int i=0; i<in.degree; i++){
         out[i] = (int64_t) input[i];
     }
-    
+
     _ntt(out, w);
     return out;
 }
 
 Polynomial NTT::intt(vector<int64_t> &in, int w){
-    int N = size(in);
+    int N = in.size();
     double* coeff = (double*) malloc(N*sizeof(double));
 
     int64_t wInv = modInv(w, M);
     int64_t nInv = modInv(N, M);
 
     _ntt(in, wInv);
-    for(int i=0; i<size(in); i++){
-        coeff[i] = (in[i] * nInv) % M;
+
+    for(int i=0; i<in.size(); i++){
+        coeff[i] = modMul(in[i], nInv, M);
     }
 
     Polynomial pOut(N, coeff);
diff --git a/cuda/ntt.h b/cuda/ntt.h
index 5078f1f82c2edb248ad84898dd70fc22fb95d45c..809cc3ee9dcc7b7f365156c075b2673f39afd83d 100644
--- a/cuda/ntt.h
+++ b/cuda/ntt.h
@@ -4,7 +4,7 @@
 
 using namespace std;
 
-#define size(c) ((int)(c).size())
+
 
 #ifndef NTT_H
 #define NTT_H
@@ -12,15 +12,19 @@ using namespace std;
 class NTT{
     public:
         int M = 2013265921;
+        
+        vector<int64_t> ntt(Polynomial in, int degree, int64_t w);
+        Polynomial intt(vector<int64_t> &in, int w);
+        static int64_t modMul(int64_t a, int64_t b, int64_t mod);
+        int64_t genNthRoot(int mod, int n);
+    
+    private:
         int64_t modExp(int64_t base, int64_t power, int64_t mod);
         int64_t modInv(int64_t x, int64_t mod);
         int bitLength(int x);
         void reverse(vector<int64_t> &in, int bitLen);
         bool existSmallerN(int r, int mod, int n);
-        int64_t genNthRoot(int mod, int n);
         void _ntt(vector<int64_t> &in, int64_t w);
-        vector<int64_t> ntt(Polynomial in, int degree, int64_t w);
-        Polynomial intt(vector<int64_t> &in, int w);
 };
 
 
diff --git a/cuda/parallel.cu b/cuda/parallel.cu
index 29d587b9e64c28b0b32d26a4729b96d8ecb586f0..ced1c066d04e4693987ee47c5469fdf99e9965d5 100644
--- a/cuda/parallel.cu
+++ b/cuda/parallel.cu
@@ -30,9 +30,9 @@ __device__ cuDoubleComplex power(cuDoubleComplex base, int power){
 __global__ void initVandermonde(cuDoubleComplex *vand, int M, cuDoubleComplex xi){
   int ROW = blockIdx.y*blockDim.y+threadIdx.y;
   int COL = blockIdx.x*blockDim.x+threadIdx.x;
+
   int exp = (2*ROW+1)*COL;
   vand[ROW * M + COL] = power(xi, exp);
-  if(ROW == 0 && COL == 0) printf("test = %f\n", cuCreal(power(xi, exp)));
 }
 
 __global__ void transpose(cuDoubleComplex *dest, cuDoubleComplex *source, int N){
@@ -67,6 +67,23 @@ __global__ void computeCoord(double *res, cuDoubleComplex *target, cuDoubleCompl
   res[idx] = temp;
 }
 
+__global__ void computeLargeCoord(double *res, cuDoubleComplex *target, cuDoubleComplex xi, int M){
+  int idx = threadIdx.x + blockIdx.x*blockDim.x;
+  double temp;
+
+  cuDoubleComplex a = make_cuDoubleComplex(0.0,0.0);
+  cuDoubleComplex b = make_cuDoubleComplex(0.0,0.0);
+  for(int i=0; i<M; i++){
+    int exp = (2*i+1) * idx;
+    cuDoubleComplex vandBase = power(xi, exp);
+    a = cuCadd(a, cuCmul(vandBase, cuConj(target[i])));
+    b = cuCadd(b, cuCmul(vandBase, cuConj(vandBase)));
+  }
+
+  temp = cuCreal(a)/cuCreal(b);
+  res[idx] = temp;
+}
+
 __global__ void setup_kernel(curandState *state){
   int idx = threadIdx.x+blockDim.x*blockIdx.x;
   curand_init(1234, idx, 0, &state[idx]);
@@ -83,12 +100,23 @@ __global__ void unscale(double *target, double scale){
 }
 
 
-__global__ void sigma(double *pol, cuDoubleComplex *vand, int M, cuDoubleComplex *target){
+__global__ void sigma(double *pol, cuDoubleComplex *vand, int M, int offset, cuDoubleComplex *target){
   int idx = threadIdx.x + blockIdx.x*blockDim.x;
   target[idx] = make_cuDoubleComplex(0.0, 0.0);
+
   for(int i=0; i<M; i++){
-    target[idx].x += vand[idx*M+i].x * pol[i];
-    target[idx].y += vand[idx*M+i].y * pol[i];
+    target[idx].x += vand[idx*offset+i].x * pol[i];
+    target[idx].y += vand[idx*offset+i].y * pol[i];
+  }
+}
+
+__global__ void largeSigma(double *pol, cuDoubleComplex xi, int M, cuDoubleComplex *target){
+  int idx = threadIdx.x + blockIdx.x*blockDim.x;
+  target[idx] = make_cuDoubleComplex(0.0, 0.0);
+  for(int i=0; i<M; i++){
+    cuDoubleComplex x = power(xi, (2*idx+1)*i);
+    target[idx].x += x.x * pol[i];
+    target[idx].y += x.y * pol[i];
   }
 }
 
@@ -110,17 +138,20 @@ Encoder::Encoder(int Min, double s){
     blocksPerGrid.y = ceil(double(N)/double(threadsPerBlock.y));
   }
 
-  initVandermonde<<<blocksPerGrid, threadsPerBlock>>>(vand, M/2, xi);
-  cudaDeviceSynchronize();
+  if(M/4 <= pow(2,12)){
+    initVandermonde<<<blocksPerGrid, threadsPerBlock>>>(vand, M/2, xi);
+    cudaDeviceSynchronize();
 
-  dcomplex *test = (dcomplex *) malloc(64*64*sizeof(dcomplex));
-  cudaMemcpy(test, vand, 64*64*sizeof(dcomplex), cudaMemcpyDeviceToHost);
+    dcomplex *test = (dcomplex *) malloc(64*64*sizeof(dcomplex));
+    cudaMemcpy(test, vand, 64*64*sizeof(dcomplex), cudaMemcpyDeviceToHost);
 
-  cudaMalloc((void**)&sigmaRBasis, N*N*sizeof(cuDoubleComplex));
-  transpose<<<blocksPerGrid, threadsPerBlock>>>(sigmaRBasis, vand, M/2);
+    cudaMalloc((void**)&sigmaRBasis, N*N*sizeof(cuDoubleComplex));
+    transpose<<<blocksPerGrid, threadsPerBlock>>>(sigmaRBasis, vand, M/2);
+  } 
 }
 
 Polynomial Encoder::encode(dcomparr input){
+  
   cuDoubleComplex *target;
   size_t size = M/2;
   Polynomial out(M/2);
@@ -128,26 +159,37 @@ Polynomial Encoder::encode(dcomparr input){
   cudaMalloc((void**) &target, size * sizeof(cuDoubleComplex));
   cudaMemcpy(target, input.arr, M/4 * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
 
+  auto start = high_resolution_clock::now();
+
   int block = 1;
   int threadPerBlock = M/2;
   if(threadPerBlock > 1024){
     threadPerBlock = 1024;
-    block = ceil((double)(M/2)/1024.0);
+    block = ceil((double)(M/2)/1024.0);  
   }
 
-  auto start = high_resolution_clock::now();
   expScale<<<block,threadPerBlock/2>>>(target, M/2, scale);
 
   double *res;
   cudaMalloc((void**)&res, M/2*sizeof(double));
-  computeCoord<<<block,threadPerBlock>>>(res, target, sigmaRBasis, M/2);
 
-  coordWRR<<<block,threadPerBlock>>>(res);
+  if(M/4 <= pow(2,12)){
+    computeCoord<<<block,threadPerBlock>>>(res, target, sigmaRBasis, M/2);
+  }else {
+    root = exp((2*M_PI/M) * J);
+    cuDoubleComplex xi = make_cuDoubleComplex(real(root), imag(root));
+    computeLargeCoord<<<block,threadPerBlock>>>(res, target, xi, M/2);
+  }
 
+  coordWRR<<<block,threadPerBlock>>>(res);
   out.setCoeffs(res);
+
   auto stop = high_resolution_clock::now();
   auto duration = duration_cast<microseconds>(stop - start);
-  cout << "Encoding: " << duration.count() << endl;
+  // cout << "Encoding: " << duration.count() << endl;
+
+  cudaFree(res);
+  cudaFree(target);
   return out;
 }
 
@@ -155,7 +197,7 @@ dcomparr Encoder::decode(Polynomial pt){
   auto start = high_resolution_clock::now();
   
   int block = 1;
-  int threadPerBlock = M/2;
+  int threadPerBlock = pt.degree;
   if(threadPerBlock > 1024){
     threadPerBlock = 1024;
     block = ceil((double)(M/2)/1024.0);
@@ -165,15 +207,56 @@ dcomparr Encoder::decode(Polynomial pt){
 
   cuDoubleComplex *target;
   cudaMalloc((void**)&target, M/4 * sizeof(cuDoubleComplex));
-  sigma<<<block,threadPerBlock/2>>>(pt.coeffs, vand, M/2, target);
+  
+  if(M/4 <= pow(2,12)){
+    if(pt.degree == M/2){
+      sigma<<<block,threadPerBlock/2>>>(pt.coeffs, vand, M/2, M/2, target);
+    } else {
+      int R = 2;
+      while (R < pt.degree){
+        R <<=1;
+      }
+
+      cuDoubleComplex *tempVand;
+      cudaMalloc((void**)&tempVand, R*R*sizeof(cuDoubleComplex));
+
+      cuDoubleComplex xi = make_cuDoubleComplex(real(root), imag(root));
+      dim3 threads(R, R);
+      dim3 blocksPerGrid(1, 1);
+  
+      if (R*R > 512){
+        threads.x = 16;
+        threads.y = 16;
+        blocksPerGrid.x = ceil(double(R)/double(threads.x));
+        blocksPerGrid.y = ceil(double(R)/double(threads.y));
+      }
+
+      initVandermonde<<<blocksPerGrid, threads>>>(tempVand, R, xi);
+
+      threadPerBlock = M/2;
+      if(threadPerBlock > 1024){
+        threadPerBlock = 1024;
+        block = ceil((double)(M/2)/1024.0);
+      }
+
+      sigma<<<block,threadPerBlock/2>>>(pt.coeffs, tempVand, pt.degree, R, target);
+      cudaFree(tempVand);
+    }
+    
+  }else {
+    cuDoubleComplex xi = make_cuDoubleComplex(real(root), imag(root));
+    largeSigma<<<block,threadPerBlock/2>>>(pt.coeffs, xi, M/2, target);
+  }
+
+  auto stop = high_resolution_clock::now();
+  auto duration = duration_cast<microseconds>(stop - start);
+  // cout << "Decoding: " << duration.count() << endl;
 
   dcomparr output(M/4);
   size_t size_out = (M/4)*sizeof(dcomplex);
   cudaMemcpy(output.arr, target, size_out, cudaMemcpyDeviceToHost);
 
-  auto stop = high_resolution_clock::now();
-  auto duration = duration_cast<microseconds>(stop - start);
-  cout << "Decoding: " << duration.count() << endl;
+  cudaFree(target);
+
   return output;
 }
-
diff --git a/cuda/polynomial.cu b/cuda/polynomial.cu
index 8f5944cddeb19130950fb94caabe2bb79b0bd82b..bf07b52b47aa26f186336fef3ebeb18aa988a562 100644
--- a/cuda/polynomial.cu
+++ b/cuda/polynomial.cu
@@ -31,11 +31,11 @@ __global__ void polyRoundScale(double *res, double *in, double scale){
     res[idx] = (double) round(in[idx] * scale);
 }
 
-__global__ void polyMult(double *res, double *a, double *b, int deg){
+__global__ void polyMult(double *res, double *a, double *b, int degA, int degB){
     int idx = threadIdx.x;
-    res[idx] = 0;
-    for(int i=0; i<deg; i++){
-        for(int j=0; j<deg; j++){
+    res[idx] = 0.0;
+    for(int i=0; i<degA; i++){
+        for(int j=0; j<degB; j++){
             if(j+i == idx){
                 res[idx] += a[i] * b[j];
             }
@@ -62,7 +62,15 @@ Polynomial Polynomial::operator + (Polynomial const &obj){
     int deg = max(obj.degree, degree);
     double* resCoeff;
     cudaMalloc((void**)&resCoeff, deg*sizeof(double));
-    polyAdd<<<1,deg>>>(coeffs, degree, obj.coeffs, obj.degree, resCoeff);
+    
+    int block = 1;
+    int threadPerBlock = deg;
+    if(threadPerBlock > 1024){
+        threadPerBlock = 1024;
+        block = ceil((double)(deg)/1024.0);
+    }
+
+    polyAdd<<<block,threadPerBlock>>>(coeffs, degree, obj.coeffs, obj.degree, resCoeff);
     
     Polynomial out(deg);
     out.setCoeffs(resCoeff);
@@ -70,12 +78,21 @@ Polynomial Polynomial::operator + (Polynomial const &obj){
 }
 
 Polynomial Polynomial::operator * (Polynomial const &obj){
-    int deg = obj.degree + degree;
+    int deg = obj.degree + degree-1;
     double *resCoeff;
     cudaMalloc((void**)&resCoeff, deg*sizeof(double));
-    polyMult<<<1, deg>>>(resCoeff, coeffs, obj.coeffs, deg);
+    
+    int block = 1;
+    int threadPerBlock = deg;
+    if(threadPerBlock > 1024){
+        threadPerBlock = 1024;
+        block = ceil((double)(deg)/1024.0);
+    }
+
+    polyMult<<<block,threadPerBlock>>>(resCoeff, coeffs, obj.coeffs, degree, obj.degree);
     Polynomial out(deg);
     out.setCoeffs(resCoeff);
+
     return out;
 }
 
@@ -87,7 +104,15 @@ Polynomial Polynomial::modCoeff(int q){
     Polynomial out(degree);
     double *resCoeff;
     cudaMalloc((void**)&resCoeff, degree*sizeof(double));
-    polyMod<<<1, degree>>>(resCoeff, coeffs, q);
+
+    int block = 1;
+    int threadPerBlock = degree;
+    if(threadPerBlock > 1024){
+        threadPerBlock = 1024;
+        block = ceil((double)(degree)/1024.0);
+    }
+
+    polyMod<<<block,threadPerBlock>>>(resCoeff, coeffs, q);
     out.setCoeffs(resCoeff);
     return out;
 }
@@ -96,7 +121,15 @@ Polynomial Polynomial::scaleCoeff(double scale){
     Polynomial out(degree);
     double *resCoeff;
     cudaMalloc((void**)&resCoeff, degree*sizeof(double));
-    polyScale<<<1, degree>>>(resCoeff, coeffs, scale);
+
+    int block = 1;
+    int threadPerBlock = degree;
+    if(threadPerBlock > 1024){
+        threadPerBlock = 1024;
+        block = ceil((double)(degree)/1024.0);
+    }
+
+    polyScale<<<block,threadPerBlock>>>(resCoeff, coeffs, scale);
     out.setCoeffs(resCoeff);
     return out;
 }
@@ -105,7 +138,15 @@ Polynomial Polynomial::scaleRoundCoeff(double scale){
     Polynomial out(degree);
     double *resCoeff;
     cudaMalloc((void**)&resCoeff, degree*sizeof(double));
-    polyRoundScale<<<1, degree>>>(resCoeff, coeffs, scale);
+
+    int block = 1;
+    int threadPerBlock = degree;
+    if(threadPerBlock > 1024){
+        threadPerBlock = 1024;
+        block = ceil((double)(degree)/1024.0);
+    }
+
+    polyRoundScale<<<block,threadPerBlock>>>(resCoeff, coeffs, scale);
     out.setCoeffs(resCoeff);
     return out;
 }
diff --git a/cuda/pubkey.cu b/cuda/pubkey.cu
index 9e9dbab090d5adcfa98b2332e63f9bcb37821bf4..580da10540557638e777a30c4ed422176373f28e 100644
--- a/cuda/pubkey.cu
+++ b/cuda/pubkey.cu
@@ -30,7 +30,7 @@ void PubKey::generateA(int degree, int64_t q){
     int64_t half_q = q/2;
     random_device rd;
     mt19937 gen(rd());
-    uniform_int_distribution<int64_t> dis(-half_q, 0);
+    uniform_int_distribution<int64_t> dis(0, half_q);
     double a_coeffs[degree];
     
     for (int i=0; i<degree; i++){
@@ -41,9 +41,10 @@ void PubKey::generateA(int degree, int64_t q){
 
 void PubKey::computeB(int q, Polynomial s){
     Polynomial err = genE(a.degree, 1.0);
-    Polynomial temp = (a.scaleCoeff(-1.0)) * s + err;
+    Polynomial temp = (a.scaleCoeff(-1.0)) * s;
     Polynomial res = temp.modCoeff(q);
 
+
     b = Polynomial(res.degree);
     b.setCoeffs(res.coeffs);
 }
diff --git a/cuda/run b/cuda/run
index 63310b5b9d6a41736086824680060af5cae67974..35cba99a8edbc6cf26fd1bd7d15e950ff0030216 100755
Binary files a/cuda/run and b/cuda/run differ
diff --git a/cuda/seckey.cu b/cuda/seckey.cu
index d0efd3915d2130c7dc3c2ea2a5065ce2c084e7b3..1636c661e35b5b2e81791b2f179ddc6f21ca7485 100644
--- a/cuda/seckey.cu
+++ b/cuda/seckey.cu
@@ -4,15 +4,22 @@
 #include <random>
 #include "seckey.h"
 
+#include <chrono>
+using namespace std::chrono;
+
+
 using namespace std;
 
 SecKey::SecKey(int deg){
     double coeff[deg];
+    auto start1 = high_resolution_clock::now();
 
     std::random_device rd; 
     std::mt19937 gen(rd()); 
     std::uniform_int_distribution<> distr(deg/2, deg);
 
+    
+
     int h = distr(gen);
     string ones(h, '1');
     string zeros((deg-h), '0');
@@ -21,9 +28,20 @@ SecKey::SecKey(int deg){
     random_shuffle(key.begin(), key.end());
 
     for(int i=0; i<deg; i++){
-        char temp = key.at(i);
-        coeff[i] = (double) (atoi(&temp));
+        char temp = key[i];
+        int coef = (int(temp)-48);
+
+        if(coef != 0){
+            coeff[i] = (double) -1 * coef;
+        } else {
+            coeff[i] = 0.0;
+        }
+        
     }
 
+    auto stop1 = high_resolution_clock::now();
+    auto duration1 = duration_cast<microseconds>(stop1 - start1);
+    cout << "secret key: " << duration1.count() << endl;
+
     s = Polynomial(deg, coeff);
 }
\ No newline at end of file
diff --git a/cuda/temp.cu b/cuda/temp.cu
new file mode 100644
index 0000000000000000000000000000000000000000..292e6429246ee2097f7f04312e5a8418188920ef
--- /dev/null
+++ b/cuda/temp.cu
@@ -0,0 +1,298 @@
+    // Encoder enc(128, 128.0);
+    // dcomplex a = 1.0 + 1.0 *J; 
+    // dcomparr input;
+
+
+
+    int q0 = 67;
+    int pl[6] = {61, 67, 71, 73, 79, 61};
+    int64_t ql = q0;
+    for (int i=0; i<5; i++){
+        ql *= pl[i];
+    }
+
+    int N = pow(2,1);
+    double scale = 64.0;
+    Encoder enc(4*N, scale);
+    dcomplex a1 = 3.0 + 4.0 *J;
+    dcomplex a2 = 2.0 - 1.0 *J; 
+    dcomparr input(N);
+    input.arr[0] = a1;
+    input.arr[1] = a2;
+
+    dcomparr input2(N);
+    input2.arr[0] = 1.0 + 1.0*J;
+    input2.arr[1] = 1.0 + 2.0*J;
+
+    Polynomial pt1 = enc.encode(input2);
+    // pt1.printPol();
+
+    auto start = high_resolution_clock::now();
+    Polynomial pt = enc.encode(input);
+    auto stop = high_resolution_clock::now();
+    auto duration = duration_cast<microseconds>(stop - start);
+    pt.printPol();
+
+    // dcomparr output = enc.decode(pt);
+    // for(int i=0; i<N; i++){
+    //     cout << output.arr[i] << " ";
+    // }
+    // cout << endl;
+
+    // double realN = 0;
+    // double imagN = 0;
+
+    // for(int i=0; i<N; i++){
+    //     realN += pow(abs(real(output.arr[i]) - real(input.arr[i])), 2);
+    //     imagN += pow(abs(imag(output.arr[i]) - imag(input.arr[i])), 2);
+    // }
+
+    // double average = (realN+imagN)/N;
+    // average = pow(average, 0.5);
+    // cout << average << endl;
+
+    // dcomparr output = enc.decode(pt);
+    SecKey sk(N);
+
+    auto start2 = high_resolution_clock::now();
+    PubKey pk(sk.s, (N/2)+1, ql);
+    auto stop2 = high_resolution_clock::now();
+    auto duration2 = duration_cast<microseconds>(stop2 - start2);
+    // cout << "public key: " << duration2.count() << endl;
+
+    auto start3 = high_resolution_clock::now();
+    EvalKey evk(sk.s, (N/2)+1, ql);
+    auto stop3 = high_resolution_clock::now();
+    auto duration3 = duration_cast<microseconds>(stop3 - start3);
+    // cout << "eval key: " << 2*duration2.count() << endl;
+
+    CKKS ckks(evk);
+
+    auto start4 = high_resolution_clock::now();
+    Ciphertext ct = ckks.encrypt(pt, pk);
+    auto stop4 = high_resolution_clock::now();
+    auto duration4 = duration_cast<microseconds>(stop4 - start4);
+    cout << "encrypt: " << duration4.count() << endl;
+
+    // Ciphertext ct1 = ckks.encrypt(pt1, pk);
+
+    // auto start5 = high_resolution_clock::now();
+    // Polynomial ptOut = ckks.decrypt(ct, sk);
+    // auto stop5 = high_resolution_clock::now();
+    // auto duration5 = duration_cast<microseconds>(stop5 - start5);
+    // cout << "decrypt: " << duration5.count() << endl;
+    // ct.c0.printPol();
+
+    // auto start6 = high_resolution_clock::now();
+    // Ciphertext ctadd = ckks.add(ct, ct1);
+    // auto stop6 = high_resolution_clock::now();
+    // auto duration6 = duration_cast<microseconds>(stop6 - start6);
+    // cout << "add: " << duration6.count() << endl;
+
+    // ctadd.c0.printPol();
+
+    // Polynomial ptOut = ckks.decrypt(ctadd, sk);
+    // ptOut.printPol();
+
+
+    // auto start7 = high_resolution_clock::now();
+    // Ciphertext ctmult = ckks.multNTT(ct, ct);
+    Ciphertext ctmultNorm = ckks.mult(ct, ct);
+    // auto stop7 = high_resolution_clock::now();
+    // auto duration7 = duration_cast<microseconds>(stop7 - start7);
+    // cout << "mult: " << duration7.count() << endl;
+
+    Polynomial multOut = ckks.decrypt(ctmultNorm, sk);
+    multOut.printPol();
+
+    dcomparr multOutput = enc.decode(pt);
+    for(int i=0; i<2; i++){
+        cout << multOutput.arr[i] << endl;
+    }
+
+
+    // NTT ntt;
+    // int64_t w = ntt.genNthRoot(ntt.M, N);
+    
+    // auto start7 = high_resolution_clock::now();
+    // vector<int64_t> nttRes = ntt.ntt(pt, N*2, w);
+    // auto stop7 = high_resolution_clock::now();
+    // auto duration7 = duration_cast<microseconds>(stop7 - start7);
+    // cout << "full ntt: " << duration7.count() << endl;
+
+    // auto start8 = high_resolution_clock::now();
+    // Ciphertext ct1 = ckks.multNTT(ct,ct);
+    // auto stop8 = high_resolution_clock::now();
+    // auto duration8 = duration_cast<microseconds>(stop8 - start8);
+    // cout << "mult ntt: " << duration8.count() << endl;
+
+    // auto start9 = high_resolution_clock::now();
+    // Polynomial nttOut = ntt.intt(nttRes1, w);
+    // Ciphertext ctmult = ckks.multNTT(ct, ct);
+    // auto stop9 = high_resolution_clock::now();
+    // auto duration9 = duration_cast<microseconds>(stop9 - start9);
+    // cout << "intt: " << duration9.count() << endl;
+
+    // int q0 = 67;
+    // int pl[6] = {61, 67, 71, 73, 79, 59};
+    // int64_t ql = q0;
+
+    // for (int i=0; i<3; i++){
+    //     ql *= pl[i];
+    // }
+
+    // SecKey sk = SecKey(2);
+    // PubKey pk(sk.s, 3, ql);
+    // EvalKey evk(sk.s, 3, ql);
+
+    // CKKS ckks(4, pk, evk, sk);
+    // Ciphertext ct = ckks.encrypt(pt);
+    // ct.c0.printPol();
+    // ct.c1.printPol();
+
+    // cout << endl;
+    // Polynomial ptout = ckks.decrypt(ct);
+    // ptout.printPol();
+
+    // cout << endl;
+    // Ciphertext ctadd = ckks.add(ct, ct);
+    // ctadd.c0.printPol();
+    // ctadd.c1.printPol();
+
+    // cout << endl;
+    // Polynomial ptadd = ckks.decrypt(ctadd);
+    // ptadd.printPol();
+
+    // cout << endl;
+    // Ciphertext ctmult = ckks.mult(ct, ct);
+    // ctmult.c0.printPol();
+    // ctmult.c1.printPol();
+
+    // cout << endl;
+    // Polynomial ptmult = ckks.decrypt(ctmult);
+    // ptmult.printPol();
+
+    // Encoder enc2(16, 64.0);
+    // dcomparr outputMult = enc2.decode(ptmult);
+    // for(int i=0; i<4; i++){
+    //     cout << outputMult.arr[i] << " ";
+    // }
+    // cout << endl;
+
+    bool error = false;
+    // for(int i=1; i<5; i++){
+    //     cout << i << endl;
+    //     int N = pow(2, i);
+    //     Encoder enc(N*4, 64.0);
+    //     dcomparr in = randomFill(N);
+    //     Polynomial test = enc.encode(in);
+    //     dcomparr out = enc.decode(test);
+    //     for(int j=0; j<N; j++){
+    //         if (abs(in.arr[i]-out.arr[i]) > 2.0){
+    //             error = true;
+    //             break;
+    //         }
+    //     }
+    //     if (error) break;
+    // }
+
+    // for(int i=1; i<5; i++){
+    //     int N = pow(2, i);
+    //     Encoder enc(N*4, 64.0);
+
+    //     SecKey sk = SecKey(N);
+    //     PubKey pk(sk.s, (N)+1, ql);
+    //     EvalKey evk(sk.s, (N)+1, ql);
+    //     CKKS ckks(N, pk, evk, sk);
+
+    //     dcomparr in = randomFill(N);
+        
+    //     Polynomial test = enc.encode(in);
+    //     Ciphertext ct = ckks.encrypt(test);
+    //     Polynomial testOut = ckks.decrypt(ct);
+
+    //     dcomparr out = enc.decode(testOut);
+    //     for(int j=0; j<N; j++){
+    //         if (abs(in.arr[j]-out.arr[j]) > 2.0){
+    //             error = true;
+    //             break;
+    //         }
+    //     }
+    //     if (error) break;
+    // }
+
+    // for(int i=1; i<5; i++){
+    //     int N = pow(2, i);
+    //     Encoder enc(N*4, 64.0);
+
+    //     SecKey sk = SecKey(N);
+    //     PubKey pk(sk.s, (N)+1, ql);
+    //     EvalKey evk(sk.s, (N)+1, ql);
+    //     CKKS ckks(N, pk, evk, sk);
+
+    //     dcomparr in = randomFill(N);
+        
+    //     Polynomial test = enc.encode(in);
+    //     Ciphertext ct = ckks.encrypt(test);
+    //     Ciphertext ctadd = ckks.add(ct,ct);
+    //     Polynomial testOut = ckks.decrypt(ctadd);
+
+    //     dcomparr out = enc.decode(testOut);
+    //     dcomplex scale(2.0, 0.0);
+    //     for(int j=0; j<N; j++){
+    //         if (abs((scale*in.arr[j])-out.arr[j]) > 2.0){
+    //             error = true;
+    //             break;
+    //         }
+    //     }
+    //     if (error) break;
+    // }
+
+    // for(int i=1; i<5; i++){
+    //     int N = pow(2, i);
+    //     Encoder enc(N*4, 64.0);
+
+    //     SecKey sk = SecKey(N);
+    //     PubKey pk(sk.s, (N)+1, ql);
+    //     EvalKey evk(sk.s, (N)+1, ql);
+    //     CKKS ckks(N, pk, evk, sk);
+
+    //     dcomparr in = randomFill(N);
+        
+    //     Polynomial test = enc.encode(in);
+    //     Ciphertext ct = ckks.encrypt(test);
+    //     Ciphertext ctadd = ckks.mult(ct,ct);
+    //     Polynomial testOut = ckks.decrypt(ctadd);
+
+    //     Polynomial control = (test * test).scaleRoundCoeff(1.0/64.0);
+    //     dcomparr out = enc.decode(testOut);
+    //     dcomparr check = enc.decode(control);
+
+    //     for(int j=0; j<N*2; j++){
+    //         if (abs(check.arr[j]-out.arr[j]) > 2.0){
+    //             error = true;
+    //             break;
+    //         }
+    //     }
+    //     if (error) break;
+    // }
+
+    // double input[] = {2.0, 1.0};
+    // Polynomial in(2, input);
+
+    // NTT ntt;
+    // vector<int64_t> out = ntt.ntt(in, 8, 1801542727);
+
+    // for(int i=0; i<8; i++){
+    //     cout << out[i] << " ";
+    // }
+    // cout << endl;
+
+    // Polynomial test = ntt.intt(out, 1801542727);
+    // test.printPol();
+
+    // if(error) {
+    //     cout << "Error" << endl;
+    // } else {
+    //     cout << "Finish" << endl;
+    // }
\ No newline at end of file
diff --git a/cuda/test.cu b/cuda/test.cu
index 02c7f14708a07d30b73bca88a1ad4a20f118cea9..33a9a6e2c98a0492e7f1441212a7a8debbd70d34 100644
--- a/cuda/test.cu
+++ b/cuda/test.cu
@@ -14,171 +14,99 @@ using namespace std;
 
 using namespace std::chrono;
 
-dcomparr randomFill(int n){
-    dcomparr out(n);
-    for(int i=0; i<n; i++){
-        out.arr[i] = 1.0 + 1.0*J;
-    }
-    return out;
-}
 
 int main(){
-    // Encoder enc(128, 128.0);
-    // dcomplex a = 1.0 + 1.0 *J; 
-    // dcomparr input;
-    // for(int i=0; i<32; i++){
-    //     input.arr[i] = a;
-    // }
-
-    // Encoder enc(8, 64.0);
-    // dcomplex a1 = 3.0 + 4.0 *J; 
-    // dcomplex a2 = 2.0 - 1.0*J;
-    // dcomparr input(2);
-    // input.arr[0] = a1;
-    // input.arr[1] = a2;
-
-    // Polynomial pt = enc.encode(input);
-    // pt.printPol();
-
     int q0 = 67;
-    int pl[6] = {61, 67, 71, 73, 79, 59};
+    int pl[6] = {61, 67, 71, 73, 79, 61};
     int64_t ql = q0;
 
-    for (int i=0; i<3; i++){
+    for (int i=0; i<5; i++){
         ql *= pl[i];
     }
 
-    // SecKey sk = SecKey(4);
-    // PubKey pk(sk.s, 4, ql);
-    // EvalKey evk(sk.s, 4, ql);
-
-    // cout << "decode" << endl;
-    // dcomparr out = enc.decode(pt);
-    // for(int i=0; i<2; i++){
-    //     cout << out.arr[i] << " ";
-    // }
-    // cout << endl;
-
-    bool error = false;
-    // for(int i=1; i<5; i++){
-    //     cout << i << endl;
-    //     int N = pow(2, i);
-    //     Encoder enc(N*4, 64.0);
-    //     dcomparr in = randomFill(N);
-    //     Polynomial test = enc.encode(in);
-    //     dcomparr out = enc.decode(test);
-    //     for(int j=0; j<N; j++){
-    //         if (abs(in.arr[i]-out.arr[i]) > 2.0){
-    //             error = true;
-    //             break;
-    //         }
-    //     }
-    //     if (error) break;
-    // }
-
-    // for(int i=1; i<5; i++){
-    //     int N = pow(2, i);
-    //     Encoder enc(N*4, 64.0);
-
-    //     SecKey sk = SecKey(N);
-    //     PubKey pk(sk.s, (N)+1, ql);
-    //     EvalKey evk(sk.s, (N)+1, ql);
-    //     CKKS ckks(N, pk, evk, sk);
-
-    //     dcomparr in = randomFill(N);
-        
-    //     Polynomial test = enc.encode(in);
-    //     Ciphertext ct = ckks.encrypt(test);
-    //     Polynomial testOut = ckks.decrypt(ct);
-
-    //     dcomparr out = enc.decode(testOut);
-    //     for(int j=0; j<N; j++){
-    //         if (abs(in.arr[j]-out.arr[j]) > 2.0){
-    //             error = true;
-    //             break;
-    //         }
-    //     }
-    //     if (error) break;
-    // }
-
-    // for(int i=1; i<5; i++){
-    //     int N = pow(2, i);
-    //     Encoder enc(N*4, 64.0);
-
-    //     SecKey sk = SecKey(N);
-    //     PubKey pk(sk.s, (N)+1, ql);
-    //     EvalKey evk(sk.s, (N)+1, ql);
-    //     CKKS ckks(N, pk, evk, sk);
-
-    //     dcomparr in = randomFill(N);
-        
-    //     Polynomial test = enc.encode(in);
-    //     Ciphertext ct = ckks.encrypt(test);
-    //     Ciphertext ctadd = ckks.add(ct,ct);
-    //     Polynomial testOut = ckks.decrypt(ctadd);
-
-    //     dcomparr out = enc.decode(testOut);
-    //     dcomplex scale(2.0, 0.0);
-    //     for(int j=0; j<N; j++){
-    //         if (abs((scale*in.arr[j])-out.arr[j]) > 2.0){
-    //             error = true;
-    //             break;
-    //         }
-    //     }
-    //     if (error) break;
-    // }
-
-    // for(int i=1; i<5; i++){
-    //     int N = pow(2, i);
-    //     Encoder enc(N*4, 64.0);
-
-    //     SecKey sk = SecKey(N);
-    //     PubKey pk(sk.s, (N)+1, ql);
-    //     EvalKey evk(sk.s, (N)+1, ql);
-    //     CKKS ckks(N, pk, evk, sk);
-
-    //     dcomparr in = randomFill(N);
-        
-    //     Polynomial test = enc.encode(in);
-    //     Ciphertext ct = ckks.encrypt(test);
-    //     Ciphertext ctadd = ckks.mult(ct,ct);
-    //     Polynomial testOut = ckks.decrypt(ctadd);
-
-    //     Polynomial control = (test * test).scaleRoundCoeff(1.0/64.0);
-    //     dcomparr out = enc.decode(testOut);
-    //     dcomparr check = enc.decode(control);
-
-    //     for(int j=0; j<N*2; j++){
-    //         if (abs(check.arr[j]-out.arr[j]) > 2.0){
-    //             error = true;
-    //             break;
-    //         }
-    //     }
-    //     if (error) break;
-    // }
-
-    double input[] = {2.0, 1.0};
-    Polynomial in(2, input);
-
-    NTT ntt;
-    vector<int64_t> out = ntt.ntt(in, 8, 1801542727);
-
-    for(int i=0; i<8; i++){
-        cout << out[i] << " ";
-    }
+    Encoder enc(8, 64.0);
+    dcomparr input(2);
+    input.arr[0] = (3.0+4.0*J);
+    input.arr[1] = (2.0-1.0*J);
+
+    dcomparr input2(2);
+    input2.arr[0] = (1.0+1.0*J);
+    input2.arr[1] = (1.0+1.0*J);
+
+    cout << "Input: " << endl;
+    cout << "Input 1: ";
+    for(int i=0; i<2; i++) cout << input.arr[i] << " ";
+    cout << endl;
+    cout << "Input 2: ";
+    for(int i=0; i<2; i++) cout << input2.arr[i] << " ";
+    cout << endl;
+    cout << endl;
+    
+
+    cout << "Encoding: " << endl;
+    cout << "Plaintext 1: "; 
+    Polynomial pt1 = enc.encode(input);
+    pt1.printPol();
+    cout << "Plaintext 2: ";
+    Polynomial pt2 = enc.encode(input2);
+    pt2.printPol();
     cout << endl;
 
-    Polynomial test = ntt.intt(out, 1801542727);
-    test.printPol();
+    SecKey sk(2);
+    PubKey pk(sk.s, 3, ql);
+    EvalKey evk(sk.s, 3, ql);
 
-    if(error) {
-        cout << "Error" << endl;
-    } else {
-        cout << "Finish" << endl;
-    }
+    CKKS ckks(evk);
+    Ciphertext ct1 = ckks.encrypt(pt1, pk);
+    Ciphertext ct2 = ckks.encrypt(pt2, pk);
 
-    return 0;
+    cout << "Encryption pt1: " << endl;
+    cout << "c0: "; ct1.c0.printPol();
+    cout << "c1: "; ct1.c1.printPol();
+    cout << endl;
+
+    cout << "Ciphertext Addition : (ct1+ct2) " << endl;
+    Ciphertext ctAdd = ckks.add(ct1, ct2);
+    Polynomial addOut = ckks.decrypt(ctAdd, sk);
+    cout << "Pt Addition: "; addOut.printPol();
+    dcomparr addRes = enc.decode(addOut);
+    cout << "Addition Result: ";
+    for(int i=0; i<2; i++) cout << addRes.arr[i] << " ";
+    cout << endl;
+    cout << endl;
+
+    cout << "Ciphertext Multiplication: (ct1 * ct1) " << endl;
+    Ciphertext ctMult = ckks.mult(ct1, ct1);
+    Polynomial multOut = ckks.decrypt(ctMult, sk);
+    cout << "Pt multiplication: "; multOut.printPol();
+    dcomparr multRes = enc.decode(multOut);
+    cout << "Multiplication Result: ";
+    for(int i=0; i<2; i++) cout << multRes.arr[i] << " ";
+    cout << endl;
+    cout << endl;
+
+    cout << "Ciphertext Multiplication (NTT): (ct1 * ct1)" << endl;
+    Ciphertext ctMultNTT = ckks.multNTT(ct1, ct1);
+    Polynomial multNTTOut = ckks.decrypt(ctMultNTT, sk);
+    cout << "Pt multiplication (NTT): "; multNTTOut.printPol();
+    dcomparr multNTTRes = enc.decode(multNTTOut);
+    cout << "Multiplication Result (NTT): ";
+    for(int i=0; i<2; i++) cout << multNTTRes.arr[i] << " ";
+    cout << endl;
+    cout << endl;
 
+    cout << "Decryption ct1: " << endl;
+    Polynomial decrypted1 = ckks.decrypt(ct1, sk);
+    cout << "Decryption result: "; decrypted1.printPol();
+    cout << endl;
 
+    cout << "Decoding pt1: " << endl;
+    dcomparr output = enc.decode(decrypted1);
+    for(int i=0; i<2; i++){
+        cout << output.arr[i] << " ";
+    }
+    cout << endl;
+     
 
+    return 0;
 }
\ No newline at end of file
diff --git a/cuda/test.o b/cuda/test.o
index c7f0951f33be0ac829b0362f8519d12d225d8c76..941694f57231a98e5068e912fb55178b4b2b9e7f 100644
Binary files a/cuda/test.o and b/cuda/test.o differ
diff --git a/exp.sh b/exp.sh
new file mode 100755
index 0000000000000000000000000000000000000000..6aefbda4dcd16851aeffc971587924a279155f26
--- /dev/null
+++ b/exp.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+./test.sh > ./res/exp1.txt
+./test.sh > ./res/exp2.txt
+./test.sh > ./res/exp3.txt
+./test.sh > ./res/exp4.txt
+./test.sh > ./res/exp5.txt
diff --git a/run b/run
index 56659c8159d83dbcac387195f701f5049ebd2ed3..bef6614c785e44a535b0e678de67abc8e3d502f1 100755
Binary files a/run and b/run differ
diff --git a/src/ckks.cpp b/src/ckks.cpp
index 3714399e92ba451608e83190b3cc44aed63d555d..fb2b43f8df1275fae460c1deccb8ce7d1985ab62 100644
--- a/src/ckks.cpp
+++ b/src/ckks.cpp
@@ -103,7 +103,7 @@ Ciphertext CKKS::encrypt(Polynomial pt, PubKey pk){
 
 Polynomial CKKS::decrypt(Ciphertext ct, SecKey sk){
   Polynomial pt = ct.c0 + (ct.c1 * sk.s);
-  return pt.modCoeff(ql);
+  return pt;
 }
 
 Ciphertext CKKS::add(Ciphertext ct1, Ciphertext ct2){
@@ -145,7 +145,7 @@ int getNextPow(int deg){
 
 Ciphertext CKKS::multNTT(Ciphertext ct1, Ciphertext ct2){
   int deg = getNextPow(ct1.c0.degree+ct2.c0.degree-1);
-  int64_t w = ntt.genNthRoot(ntt.M, deg); //ntt.genNthRoot(ntt.M, deg);
+  int64_t w = 1592366214; //ntt.genNthRoot(ntt.M, deg);
 
   Polynomial d1 = fastMult(ct1.c0, ct2.c0, deg, w).modCoeff(ql);
   Polynomial d2 = (fastMult(ct1.c0, ct2.c1,deg, w) + fastMult(ct2.c0, ct1.c1,deg, w)).modCoeff(ql);  
diff --git a/src/ckks.h b/src/ckks.h
index eae9a6463e18f067b6981b0c592fe15a36288487..a252e7ceb4d2d51b1862490afa9635cef4c51b79 100644
--- a/src/ckks.h
+++ b/src/ckks.h
@@ -25,11 +25,12 @@ class CKKS {
     Ciphertext mult(Ciphertext ct1, Ciphertext ct2);
     Ciphertext multNTT(Ciphertext ct1, Ciphertext ct2);
     Ciphertext add(Ciphertext ct1, Ciphertext ct2);
+    Polynomial fastMult(Polynomial p1, Polynomial p2, int deg, int64_t w);
     
   private:
     Polynomial genE(int degree, double var);
     Polynomial genZO();
-    Polynomial fastMult(Polynomial p1, Polynomial p2, int deg, int64_t w);
+    
     
 };
 
diff --git a/src/encoder.cpp b/src/encoder.cpp
index 5b26c755451ebe4fa16a216f20ddce1355f38d3e..ee612476e3f45330c1dc199a072bd0f40c96e62b 100644
--- a/src/encoder.cpp
+++ b/src/encoder.cpp
@@ -7,6 +7,8 @@
 
 #include <chrono>
 
+int counter = 0;
+
 
 using namespace std;
 using namespace std::chrono;
@@ -16,6 +18,8 @@ dcomplex vdot(vector<dcomplex> a, vector<dcomplex> b, int size, int offset){
   for(int i=0; i<size; i++){
     out += b[i+offset] * conj(a[i]);
   }
+  // counter++;
+  // cout << counter << endl;
   return out;
 }
 
@@ -38,10 +42,24 @@ Encoder::Encoder(int in, double inScale){
   M = in;
   scale = inScale;
   root = exp((2*M_PI/M) * J);
-  vandermonde = vector<dcomplex>((M/2)*(M/2));
-  sigmaRBasis = vector<dcomplex>((M/2)*(M/2));
-  initVandermonde(root);
-  VandermondeTranspose();
+  if(M/4 <= pow(2,12)){
+    vandermonde = Eigen::MatrixXcd((M/2),(M/2));
+    sigmaRBasis = Eigen::MatrixXcd((M/2),(M/2));
+    initVandermonde(root);
+    VandermondeTranspose();
+  } else {
+    sigmaBase = Eigen::VectorXcd(M/2);
+    vanderBase = Eigen::VectorXcd(M/2);
+
+    for(int j=0; j<M/2; j++){
+      int power = (2*j+1);
+      sigmaBase(j) = pow(root, power);
+    }
+
+    for(int j=0; j<M/2; j++){
+      vanderBase(j) = pow(root, j);
+    }
+  }
 }
 
 void Encoder::initVandermonde(dcomplex xi){
@@ -49,7 +67,7 @@ void Encoder::initVandermonde(dcomplex xi){
   for(int i=0; i<N; i++){
     for(int j=0; j<N; j++){
       int power = (2*i+1) * j;
-      vandermonde[i*N+j] = pow(xi, power);
+      vandermonde(i,j) = pow(xi, power);
     }
   }
 }
@@ -58,7 +76,7 @@ void Encoder::VandermondeTranspose(){
   int N = M/2;
   for(int i=0; i<N; i++){
     for(int j=0; j<N; j++){
-      sigmaRBasis[j*N+i] = vandermonde[i*N+j];
+      sigmaRBasis(j,i) = vandermonde(i,j);
     }
   }
 }
@@ -81,20 +99,20 @@ dcomparr Encoder::sigma(Polynomial pol){
     dcomparr out(pol.degree/2);
     for(int i=0; i<N; i++){
       for (int j=0; j<M/2; j++){
-        out.arr[i] += pol.coeffs[j] * vandermonde[i*M/2+j];
+        out.arr[i] += pol.coeffs[j] * vandermonde(i, j);
       }
     }
 
     return out;
   }else{
     int R = 2;
+    
     pol.reduceDeg();
     
     while (R < pol.degree){
       R <<=1;
     }
-
-    dcomparr out(R/2);
+    dcomparr out(M/4);
     vector<dcomplex> tempVandermonde = vector<dcomplex>(R*R);
     
     for(int i=0; i<R; i++){
@@ -105,7 +123,7 @@ dcomparr Encoder::sigma(Polynomial pol){
       }
     }
 
-    for(int i=0; i<R/2; i++){
+    for(int i=0; i<N; i++){
       for (int j=0; j<R; j++){
         if(j<pol.degree){
           out.arr[i] += pol.coeffs[j] * tempVandermonde[i*R+j];
@@ -118,21 +136,66 @@ dcomparr Encoder::sigma(Polynomial pol){
   
 }
 
+dcomparr Encoder::largeSigma(Polynomial pol){
+  int N = M/4;
+  pol.reduceDeg();
+
+  int R = 2;
+  while (R < pol.degree){
+    R <<=1;
+  }
+
+  dcomparr out(R/2);
+  for(int i=0; i<R/2; i++){
+    for (int j=0; j<R; j++){
+      if(j<pol.degree){
+        out.arr[i] += pol.coeffs[j] * pow(vanderBase(j), (2*i+1));
+      } 
+    }
+  }
+
+  return out;  
+}
+
 dcomparr Encoder::decode(Polynomial pol){
   auto start = high_resolution_clock::now();
   Polynomial unscaled = pol.scaleCoeff(1.0/scale);
-  dcomparr out = sigma(unscaled);
+
+  if(M/4<=pow(2, 12)){
+    dcomparr out = sigma(unscaled);
+    return out;
+  } else {
+    dcomparr out = largeSigma(unscaled);
+    return out;
+  }
+}
+
+dcomparr Encoder::computeCoordinate(dcomparr z){
+  dcomparr out(M/2);
+  Eigen::VectorXcd temp = Eigen::Map<Eigen::VectorXcd, Eigen::Unaligned>(z.arr.data(), z.arr.size());
+  for (int i=0; i<M/2; i++){
+    Eigen::VectorXcd b = sigmaRBasis.row(i);
+    dcomplex zi = temp.dot(b) / b.dot(b);
+    out.arr[i] = real(zi);
+  }
   
-  auto stop = high_resolution_clock::now();
-  auto duration = duration_cast<microseconds>(stop - start);
-  //cout << "Decoding: " << duration.count() << endl;
   return out;
 }
 
-dcomparr Encoder::computeCoordinate(dcomparr z){
+Eigen::VectorXcd genB(int i, int N, Eigen::VectorXcd base){
+  Eigen::VectorXcd b(N);
+  for(int j=0; j<N; j++){
+    b(j) = pow(base(j), i);
+  }
+  return b;
+}
+
+dcomparr Encoder::computeLargeCoordinate(dcomparr z){
   dcomparr out(M/2);
+  Eigen::VectorXcd temp = Eigen::Map<Eigen::VectorXcd, Eigen::Unaligned>(z.arr.data(), z.arr.size());
   for (int i=0; i<M/2; i++){
-    dcomplex zi = vdot(z.arr, sigmaRBasis, M/2, i*M/2) / vdots(sigmaRBasis, sigmaRBasis, M/2, i*M/2);
+    Eigen::VectorXcd b = genB(i, M/2, sigmaBase);
+    dcomplex zi = temp.dot(b) / b.dot(b);
     out.arr[i] = real(zi);
   }
   
@@ -155,9 +218,15 @@ dcomparr Encoder::coordinateWRR(dcomparr coordinates){
 }
 
 dcomparr Encoder::discretization(dcomparr z){
-  dcomparr coor = computeCoordinate(z);      
-  dcomparr roundedCoor = coordinateWRR(coor);
-  return roundedCoor;
+  if(M/4 <= pow(2,12)){
+    dcomparr coor = computeCoordinate(z);  
+    dcomparr roundedCoor = coordinateWRR(coor);
+    return roundedCoor;
+  } else {
+    dcomparr coor = computeLargeCoordinate(z);
+    dcomparr roundedCoor = coordinateWRR(coor);
+    return roundedCoor;
+  }
 }
 
 Polynomial Encoder::sigmaInv(dcomparr z){
@@ -184,8 +253,6 @@ Polynomial Encoder::encode(dcomparr input){
   dcomparr roundZ = discretization(zPi);
   Polynomial out = sigmaInv(roundZ);
 
-  // Polynomial out(2);
-
   auto stop = high_resolution_clock::now();
   auto duration = duration_cast<microseconds>(stop - start);
   //cout << "Encoding: " << duration.count() << endl;
diff --git a/src/encoder.h b/src/encoder.h
index 269cd9527b4b5f595ed7de440fd533e6878065fa..7a573e18cb0aaa49bdb7777d93065439f2d3b5ef 100644
--- a/src/encoder.h
+++ b/src/encoder.h
@@ -22,27 +22,32 @@ struct dcomparr {
 #define ENCODER_H
 
 class Encoder {
+  public:
+    Encoder(int in, double inScale);
+    Polynomial encode(dcomparr input);
+    dcomparr decode(Polynomial pol);
+
   private: 
     int M;
     double scale;
     dcomplex root;
-    vector<dcomplex> vandermonde;
-    vector<dcomplex> sigmaRBasis;
+    Eigen::VectorXcd vanderBase;
+    Eigen::VectorXcd sigmaBase;
+    Eigen::MatrixXcd vandermonde;
+    Eigen::MatrixXcd sigmaRBasis;
 
     void initVandermonde(dcomplex xi);
     void VandermondeTranspose();
 
     dcomparr piInv(dcomparr input);
     dcomparr sigma(Polynomial pol);
+    dcomparr largeSigma(Polynomial pol);
     dcomparr computeCoordinate(dcomparr z);
+    dcomparr computeLargeCoordinate(dcomparr z);
     dcomparr coordinateWRR(dcomparr coordinates);
     dcomparr discretization(dcomparr z);
     Polynomial sigmaInv(dcomparr z);
-  
-  public:
-    Encoder(int in, double inScale);
-    Polynomial encode(dcomparr input);
-    dcomparr decode(Polynomial pol);
+
 };
 
 
diff --git a/src/pubkey.cpp b/src/pubkey.cpp
index 0b526951e93ca1befa25eae13f65899a29127e9f..5e2dd8c1c07c3b37bb4a322cf6ab6a22809d7783 100644
--- a/src/pubkey.cpp
+++ b/src/pubkey.cpp
@@ -44,7 +44,7 @@ void PubKey::generateA(int degree, int64_t q){
 
 void PubKey::computeB(int q, Polynomial s){
     Polynomial err = genE(a.degree, 1.0);
-    Polynomial temp = (a.scaleCoeff(-1.0)) * s ;
+    Polynomial temp = (a.scaleCoeff(-1.0)) * s;
     Polynomial res = temp.modCoeff(q);
 
     b = Polynomial(res.degree, res.coeffs);
diff --git a/test.cpp b/test.cpp
index e6e808199de421791a563d3070ff94ed4de663dd..593a0c439f8d5c2f91db2135a0514b69a23b0047 100644
--- a/test.cpp
+++ b/test.cpp
@@ -19,205 +19,45 @@ dcomparr randomFill(int n){
     return out;
 }
 
-int main(){
-    // Encoder enc(8, 64.0);
-    // dcomplex a1 = 3.0 + 4.0 *J; 
-    // dcomplex a2 = 2.0 - 1.0*J;
-    // dcomparr input(2);
-    // input.arr[0] = a1;
-    // input.arr[1] = a2;
-
-    // // dcomparr input(128);
-    // // dcomplex a = 1.0 + 1.0*J;
-    // // for(int i=0; i<128; i++){
-    // //     input.arr[i] = a;
-    // // }
-
-    // Polynomial pt = enc.encode(input);
-    // pt.printPol();
 
+int main(){
     int q0 = 67;
     int pl[6] = {61, 67, 71, 73, 79, 61};
     int64_t ql = q0;
 
-    for (int i=0; i<3; i++){
+    for (int i=0; i<5; i++){
         ql *= pl[i];
     }
 
-    // SecKey sk = SecKey(2);
-    // // sk.s.printPol();
-    // PubKey pk(sk.s, 3, ql);
-    // // cout << "pk: ";
-    // // pk.b.printPol();
-    // EvalKey evk(sk.s, 3, ql);
-    // cout << "evk: ";
-    // evk.b.printPol();
+    int N = pow(2,15);
+    int scale = pow(2,10);
+    Encoder enc(N*4, scale);
+    dcomparr input = randomFill(N);
     
-    // cout << "Control: ";
-    // Polynomial pt2 = pt * pt;
-    // pt2.scaleRoundCoeff(1.0/61.0).printPol();
-
-    // CKKS ckks(4, pk, evk, sk);
-    // Ciphertext ct = ckks.encrypt(pt);
-
-    // auto start = high_resolution_clock::now();
-    // Ciphertext ctadd = ckks.multNTT(ct, ct);
-    // auto stop = high_resolution_clock::now();
-    // auto duration = duration_cast<microseconds>(stop - start);
-    //cout << "Normal Time: " << duration.count() << endl;
-
-    // cout << "Experiment: ";
-    // Polynomial ptOut = ckks.decrypt(ctadd);
-    // ptOut.printPol();
-
-    bool error = false;
-    // for(int i=1; i<5; i++){
-    //     cout << i << endl;
-    //     int N = pow(2, i);
-    //     Encoder enc(N*4, 64.0);
-    //     dcomparr in = randomFill(N);
-    //     Polynomial test = enc.encode(in);
-    //     dcomparr out = enc.decode(test);
-    //     for(int j=0; j<N; j++){
-    //         if (abs(in.arr[i]-out.arr[i]) > 2.0){
-    //             error = true;
-    //             break;
-    //         }
-    //     }
-    //     if (error) break;
-    // }
-
-    // for(int i=1; i<5; i++){
-    //     int N = pow(2, i);
-    //     Encoder enc(N*4, 64.0);
-
-    //     SecKey sk = SecKey(N);
-    //     PubKey pk(sk.s, (N)+1, ql);
-    //     EvalKey evk(sk.s, (N)+1, ql);
-    //     CKKS ckks(N, evk);
-
-    //     dcomparr in = randomFill(N);
-        
-    //     Polynomial test = enc.encode(in);
-    //     Ciphertext ct = ckks.encrypt(test, pk);
-    //     Polynomial testOut = ckks.decrypt(ct, sk);
-
-    //     dcomparr out = enc.decode(testOut);
-    //     for(int j=0; j<N; j++){
-    //         if (abs(in.arr[j]-out.arr[j]) > 2.0){
-    //             error = true;
-    //             break;
-    //         }
-    //     }
-    //     if (error) break;
-    // }
-
-    for(int i=7; i<8; i++){
-        int N = pow(2, i);
-        Encoder enc(N*4, 64.0);
-
-        SecKey sk = SecKey(N);
-        PubKey pk(sk.s, (N)+1, ql);
-        EvalKey evk(sk.s, (N)+1, ql);
-        CKKS ckks(N, evk);
+    Polynomial pt1 = enc.encode(input);
 
-        dcomparr in = randomFill(N);
-        
-        Polynomial test = enc.encode(in);
-        Ciphertext ct = ckks.encrypt(test, pk);
-        Ciphertext ctadd = ckks.add(ct,ct);
-        Polynomial testOut = ckks.decrypt(ctadd, sk);
+    SecKey sk(N/2);
+    PubKey pk(sk.s, N/2, ql);
+    EvalKey evk(sk.s, N/2, ql);
 
-        dcomparr out = enc.decode(testOut);
-        dcomplex scale(2.0, 0.0);
-        for(int j=0; j<N; j++){
-            if (abs((scale*in.arr[j])-out.arr[j]) > 2.0){
-                error = true;
-                break;
-            }
-        }
-        if (error) break;
-    }
-
-    // for(int i=7; i<9; i++){
-    //     int N = pow(2, i);
-    //     Encoder enc(N*4, 64.0);
-
-    //     SecKey sk = SecKey(N);
-    //     PubKey pk(sk.s, (N)+1, ql);
-    //     EvalKey evk(sk.s, (N)+1, ql);
-    //     CKKS ckks(N, pk, evk, sk);
-
-    //     dcomparr in = randomFill(N);
-        
-    //     Polynomial test = enc.encode(in);
-    //     Ciphertext ct = ckks.encrypt(test);
-    //     Ciphertext ctadd = ckks.mult(ct,ct);
-    //     Polynomial testOut = ckks.decrypt(ctadd);
-
-    //     Polynomial control = (test * test).scaleRoundCoeff(1.0/64.0);
-    //     dcomparr out = enc.decode(testOut);
-    //     dcomparr check = enc.decode(control);
-
-    //     for(int j=0; j<N*2; j++){
-    //         if (abs(check.arr[j]-out.arr[j]) > 2.0){
-    //             ctadd.c0.printPol();
-    //             ctadd.c1.printPol();
-    //             error = true;
-    //             break;
-    //         }
-    //     }
-    //     if (error) break;
-    // }
-
-    // for(int i=7; i<8; i++){
-    //     int N = pow(2, i);
-    //     Encoder enc(N*4, 64.0);
+    CKKS ckks(4, evk);
+    Ciphertext ct1 = ckks.encrypt(pt1, pk);
 
-    //     SecKey sk = SecKey(N);
-    //     PubKey pk(sk.s, (N)+1, ql);
-    //     EvalKey evk(sk.s, (N)+1, ql);
-    //     CKKS ckks(N, evk);
-
-    //     dcomparr in = randomFill(N);
-        
-    //     Polynomial test = enc.encode(in);
-    //     Ciphertext ct = ckks.encrypt(test, pk);
-    //     Ciphertext ctadd = ckks.multNTT(ct,ct);
-    //     Polynomial testOut = ckks.decrypt(ctadd, sk);
-
-    //     Polynomial control = (test * test).scaleRoundCoeff(1.0/64.0);
-    //     dcomparr out = enc.decode(testOut);
-    //     dcomparr check = enc.decode(control);
-        
-    //     for(int j=0; j<N; j++){
-    //         if (abs(check.arr[j]-out.arr[j]) > 2.0){
-    //             error = true;
-    //             break;
-    //         }
-    //     }
-    //     if (error) break;
-    // }
-
-    // int N = pow(2, 3);
-    // Encoder enc(N*4, 64.0);
-    // SecKey sk = SecKey(N);
-    // PubKey pk(sk.s, (N)+1, ql);
-    // EvalKey evk(sk.s, (N)+1, ql);
-    // CKKS ckks(N, pk, evk, sk);
-    // dcomparr in = randomFill(N);
-    
-    // Polynomial test = enc.encode(in);
-    // Ciphertext ct = ckks.encrypt(test);
-    // Ciphertext ctadd = ckks.multNTT(ct,ct);
-    // Polynomial testOut = ckks.decrypt(ctadd);
-    
-
-    if(error) {
-        cout << "Error" << endl;
-    } else {
-        cout << "Finish" << endl;
-    }
+    NTT ntt;
+    int64_t w = ntt.genNthRoot(ntt.M, N*2);
     
+    auto start = high_resolution_clock::now();
+    vector<int64_t> nttOut = ntt.ntt(pt1, N*2, w);
+    auto stop = high_resolution_clock::now();
+    auto duration = duration_cast<microseconds>(stop - start);
+    cout << "NTT Time: " << duration.count() << endl;
+
+    auto start1 = high_resolution_clock::now();
+    Polynomial output = ntt.intt(nttOut, w);
+    auto stop1 = high_resolution_clock::now();
+    auto duration1 = duration_cast<microseconds>(stop1 - start1);
+    cout << "iNTT Time: " << duration1.count() << endl;
+
+     
     return 0;
 }
\ No newline at end of file
diff --git a/test.o b/test.o
index 7c653c930a6ede35e85d127617ac2ebc128c68ef..f8f4c7b994b082a708dbebdc56488d4c99b4d478 100644
Binary files a/test.o and b/test.o differ