diff --git a/dags/data_drift.py b/dags/data_drift.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca149d05d9815cd2b203ae2fcb7687a487b4a1d2
--- /dev/null
+++ b/dags/data_drift.py
@@ -0,0 +1,25 @@
+import pandas as pd  
+  
+def sample_data(input_csv, output_csv, sample_fraction, random_state=None):  
+    data = pd.read_csv(input_csv)  
+  
+    print("Data sebelum sampling:")  
+    print(data.info())  
+    print(data.head())  
+  
+    sampled_data = data.sample(frac=sample_fraction, random_state=random_state)  
+  
+    print("\nData setelah sampling:")  
+    print(sampled_data.info())  
+    print(sampled_data.head())  
+  
+    sampled_data.to_csv(output_csv, index=False)  
+    print(f"\nHasil sampling disimpan ke {output_csv}")  
+  
+if __name__ == "__main__":  
+    input_csv = "telco_customer_churn.csv"
+    output_csv = "telco_customer_churn_drift.csv"
+    sample_fraction = 0.2
+    random_state = 42  
+  
+    sample_data(input_csv, output_csv, sample_fraction, random_state)