From 113a65166b8027fb0b5cf02df4636b9ccf72b017 Mon Sep 17 00:00:00 2001
From: Haziq Abiyyu Mahdy <haziq.a.mahdy@gmail.com>
Date: Fri, 10 Jan 2025 15:05:34 +0700
Subject: [PATCH] fix: cast TotalCharges (string) to float in preprocess

---
 airflow/dags/functions/preprocess_training_data.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/airflow/dags/functions/preprocess_training_data.py b/airflow/dags/functions/preprocess_training_data.py
index 382905e..6cf827b 100644
--- a/airflow/dags/functions/preprocess_training_data.py
+++ b/airflow/dags/functions/preprocess_training_data.py
@@ -59,6 +59,8 @@ def preprocess(input_path, bucket_name, output_object_key):
     for col_name in categorical_col:
         df[col_name] = df[col_name].astype("category").cat.codes
 
+    df['TotalCharges'] = ps.to_numeric(df['TotalCharges'], errors='coerce').fillna(0)
+
     print("=============================================================")
     print("Done preprocessing")    
     print(df.head())
-- 
GitLab