From a49dedba5250d852c4cfa027ccee976c089e85c8 Mon Sep 17 00:00:00 2001
From: agshaathalla <13521027@mahasiswa.itb.ac.id>
Date: Fri, 10 Jan 2025 16:01:52 +0700
Subject: [PATCH] data drift

---
 dags/data_drift.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 dags/data_drift.py

diff --git a/dags/data_drift.py b/dags/data_drift.py
new file mode 100644
index 0000000..ca149d0
--- /dev/null
+++ b/dags/data_drift.py
@@ -0,0 +1,25 @@
+import pandas as pd  
+  
+def sample_data(input_csv, output_csv, sample_fraction, random_state=None):  
+    data = pd.read_csv(input_csv)  
+  
+    print("Data sebelum sampling:")  
+    print(data.info())  
+    print(data.head())  
+  
+    sampled_data = data.sample(frac=sample_fraction, random_state=random_state)  
+  
+    print("\nData setelah sampling:")  
+    print(sampled_data.info())  
+    print(sampled_data.head())  
+  
+    sampled_data.to_csv(output_csv, index=False)  
+    print(f"\nHasil sampling disimpan ke {output_csv}")  
+  
+if __name__ == "__main__":  
+    input_csv = "telco_customer_churn.csv"
+    output_csv = "telco_customer_churn_drift.csv"
+    sample_fraction = 0.2
+    random_state = 42  
+  
+    sample_data(input_csv, output_csv, sample_fraction, random_state)  
-- 
GitLab