From da070f5da606c0b07d681dbb3123233346f55515 Mon Sep 17 00:00:00 2001
From: Kurniandha Sukma Yunastrian <13516106@std.stei.itb.ac.id>
Date: Tue, 28 Apr 2020 23:50:16 +0700
Subject: [PATCH] edit string similarity algorithm

---
 app/Http/Controllers/AutograderController.php | 51 ++++++++++++++++---
 1 file changed, 44 insertions(+), 7 deletions(-)

diff --git a/app/Http/Controllers/AutograderController.php b/app/Http/Controllers/AutograderController.php
index 0a0bfaa..67993bd 100644
--- a/app/Http/Controllers/AutograderController.php
+++ b/app/Http/Controllers/AutograderController.php
@@ -147,23 +147,60 @@ class AutograderController extends Controller
                 }
             }
 
-            $results[] = AutograderController::jaccardIndex($key, $answer);
+            $results[] = AutograderController::cosine($key, $answer);
         }
 
         return $results;
     }
 
     /**
-     * Get Jaccard Index score
+     * Get Cosine Similarity
      *
      * @return score
      */
-    public function jaccardIndex($key, $answer) {
-        $arr_intersection = count(array_intersect( $key, $answer ));
-        $arr_union = count(array_merge( $key, $answer )) - $arr_intersection;
-        $jaccard_index = $arr_intersection / $arr_union;
+    public function cosine($key, $answer) {
+        $token = [];
+        $vector1 = [];
+        $vector2 = [];
+        foreach($key as $k) {
+            if (!in_array($k, $token)) {
+                $token[] = $k;
+                $vector1[] = 0;
+                $vector2[] = 0;
+            }
+        }
+
+        foreach($answer as $k) {
+            if (!in_array($k, $token)) {
+                $token[] = $k;
+                $vector1[] = 0;
+                $vector2[] = 0;
+            }
+        }
+
+        foreach($key as $k) {
+            $vector1[array_search($k, $token)] += 1;
+        }
+        foreach($answer as $k) {
+            $vector2[array_search($k, $token)] += 1;
+        }
+
+        $dot_product = 0;
+        for($i=0; $i<count($token); $i++) {
+            $dot_product += ($vector1[$i])*($vector2[$i]);
+        }
+
+        $length1 = 0;
+        $length2 = 0;
 
-        return $jaccard_index;
+        for($i=0; $i<count($token); $i++) {
+            $length1 += pow($vector1[$i], 2);
+            $length2 += pow($vector2[$i], 2);
+        }
+
+        $similarity = $dot_product/sqrt($length1*$length2);
+        
+        return $similarity;
     }
 
     /**
-- 
GitLab