diff --git a/01-nbody.cu b/01-nbody.cu
index cdbee3aeb0fd9cb3c28d9e06f9250c4601643c3a..71d8054b53e20631f5df44690a13b6da96d39486 100644
--- a/01-nbody.cu
+++ b/01-nbody.cu
@@ -29,22 +29,27 @@ void randomizeBodies(float *data, int n) {
  * on all others, but does not update their positions.
  */
 
-void bodyForce(Body *p, float dt, int n) {
-  for (int i = 0; i < n; ++i) {
-    float Fx = 0.0f; float Fy = 0.0f; float Fz = 0.0f;
-
-    for (int j = 0; j < n; j++) {
-      float dx = p[j].x - p[i].x;
-      float dy = p[j].y - p[i].y;
-      float dz = p[j].z - p[i].z;
+__global__ void bodyForce(Body *p, float dt, int N)
+{
+  int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if (tid < N) {
+    float Fx = 0, Fy = 0, Fz = 0;
+    for (int i = 0; i < N; i++) {
+      float dx = p[i].x - p[tid].x;
+      float dy = p[i].y - p[tid].y;
+      float dz = p[i].z - p[tid].z;
       float distSqr = dx*dx + dy*dy + dz*dz + SOFTENING;
       float invDist = rsqrtf(distSqr);
       float invDist3 = invDist * invDist * invDist;
-
-      Fx += dx * invDist3; Fy += dy * invDist3; Fz += dz * invDist3;
+      Fx += dx * invDist3;
+      Fy += dy * invDist3;
+      Fz += dz * invDist3;
     }
 
-    p[i].vx += dt*Fx; p[i].vy += dt*Fy; p[i].vz += dt*Fz;
+    p[tid].vx += dt*Fx;
+    p[tid].vy += dt*Fy;
+    p[tid].vz += dt*Fz;
   }
 }
 
@@ -54,7 +59,6 @@ int main(const int argc, const char** argv) {
    * Do not change the value for `nBodies` here. If you would like to modify it,
    * pass values into the command line.
    */
-
   int nBodies = 2<<11;
   int salt = 0;
   if (argc > 1) nBodies = 2<<atoi(argv[1]);
@@ -62,7 +66,6 @@ int main(const int argc, const char** argv) {
   /*
    * This salt is for assessment reasons. Tampering with it will result in automatic failure.
    */
-
   if (argc > 2) salt = atoi(argv[2]);
 
   const float dt = 0.01f; // time step
@@ -71,7 +74,7 @@ int main(const int argc, const char** argv) {
   int bytes = nBodies * sizeof(Body);
   float *buf;
 
-  buf = (float *)malloc(bytes);
+  cudaMallocManaged(&buf, bytes);
 
   Body *p = (Body*)buf;
 
@@ -99,19 +102,25 @@ int main(const int argc, const char** argv) {
    * as well as the work to integrate the positions.
    */
 
-    bodyForce(p, dt, nBodies); // compute interbody forces
+    int threads_per_block = 128;
+    int number_of_blocks = (nBodies / threads_per_block);
+    bodyForce <<< number_of_blocks, threads_per_block >>> ( p, dt, nBodies );
+    
+    // Wait for GPU to finish before accessing on host
+    cudaDeviceSynchronize();
 
-  /*
-   * This position integration cannot occur until this round of `bodyForce` has completed.
-   * Also, the next round of `bodyForce` cannot begin until the integration is complete.
-   */
-
-    for (int i = 0 ; i < nBodies; i++) { // integrate position
+    //  Integrate position
+    for (int i = 0 ; i < nBodies; i++) {
       p[i].x += p[i].vx*dt;
       p[i].y += p[i].vy*dt;
       p[i].z += p[i].vz*dt;
     }
 
+  /*
+   * This position integration cannot occur until this round of `bodyForce` has completed.
+   * Also, the next round of `bodyForce` cannot begin until the integration is complete.
+   */
+
   /*******************************************************************/
   // Do not modify the code in this section.
     const double tElapsed = GetTimer() / 1000.0;
@@ -134,5 +143,5 @@ int main(const int argc, const char** argv) {
    * Feel free to modify code below.
    */
 
-  free(buf);
+  cudaFree(buf);
 }
diff --git a/README.md b/README.md
index 71daa2f838e2189d35ce7c72365e9800d54cc4c6..b301371d7dc822ccfd519538ca2ea8fe60c64a98 100644
--- a/README.md
+++ b/README.md
@@ -6,3 +6,4 @@ Using asynhcronous data transfer from host to device, execution can overlap. The
 
 ## Task 2
 
+ 
\ No newline at end of file
diff --git a/image_nbody_11.png b/image_nbody_11.png
new file mode 100644
index 0000000000000000000000000000000000000000..e3f2645d240b8dd654258a551101f038c72d5c73
Binary files /dev/null and b/image_nbody_11.png differ
diff --git a/image_nbody_13.png b/image_nbody_13.png
new file mode 100644
index 0000000000000000000000000000000000000000..124f27ab23b51526dbe3aaf02045d4fdd38bd75a
Binary files /dev/null and b/image_nbody_13.png differ