diff --git a/hello.c b/hello.c
deleted file mode 100644
index 9dcb9de966916d3909d04cb4142dc4d3af748f09..0000000000000000000000000000000000000000
--- a/hello.c
+++ /dev/null
@@ -1,8 +0,0 @@
-__global__ void cuda_hello(){
-    printf("Hello World from GPU!\n");
-}
-
-int main() {
-    cuda_hello<<<1,1>>>(); 
-    return 0;
-}
\ No newline at end of file
diff --git a/hello.cu b/hello.cu
index 9dcb9de966916d3909d04cb4142dc4d3af748f09..9c7f494ab9181711e9ae3d4b1cf71eba94e1fd6d 100644
--- a/hello.cu
+++ b/hello.cu
@@ -1,8 +1,38 @@
-__global__ void cuda_hello(){
-    printf("Hello World from GPU!\n");
+#include <stdio.h>
+ 
+const int N = 16; 
+const int blocksize = 16; 
+ 
+__global__ 
+void hello(char *a, int *b) 
+{
+	a[threadIdx.x] += b[threadIdx.x];
 }
-
-int main() {
-    cuda_hello<<<1,1>>>(); 
-    return 0;
+ 
+int main()
+{
+	char a[N] = "Hello \0\0\0\0\0\0";
+	int b[N] = {15, 10, 6, 0, -11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ 
+	char *ad;
+	int *bd;
+	const int csize = N*sizeof(char);
+	const int isize = N*sizeof(int);
+ 
+	printf("%s", a);
+ 
+	cudaMalloc( (void**)&ad, csize ); 
+	cudaMalloc( (void**)&bd, isize ); 
+	cudaMemcpy( ad, a, csize, cudaMemcpyHostToDevice ); 
+	cudaMemcpy( bd, b, isize, cudaMemcpyHostToDevice ); 
+	
+	dim3 dimBlock( blocksize, 1 );
+	dim3 dimGrid( 1, 1 );
+	hello<<<dimGrid, dimBlock>>>(ad, bd);
+	cudaMemcpy( a, ad, csize, cudaMemcpyDeviceToHost ); 
+	cudaFree( ad );
+	cudaFree( bd );
+	
+	printf("%s\n", a);
+	return EXIT_SUCCESS;
 }
\ No newline at end of file