From 113a65166b8027fb0b5cf02df4636b9ccf72b017 Mon Sep 17 00:00:00 2001 From: Haziq Abiyyu Mahdy <haziq.a.mahdy@gmail.com> Date: Fri, 10 Jan 2025 15:05:34 +0700 Subject: [PATCH] fix: cast TotalCharges (string) to float in preprocess --- airflow/dags/functions/preprocess_training_data.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/airflow/dags/functions/preprocess_training_data.py b/airflow/dags/functions/preprocess_training_data.py index 382905e..6cf827b 100644 --- a/airflow/dags/functions/preprocess_training_data.py +++ b/airflow/dags/functions/preprocess_training_data.py @@ -59,6 +59,8 @@ def preprocess(input_path, bucket_name, output_object_key): for col_name in categorical_col: df[col_name] = df[col_name].astype("category").cat.codes + df['TotalCharges'] = ps.to_numeric(df['TotalCharges'], errors='coerce').fillna(0) + print("=============================================================") print("Done preprocessing") print(df.head()) -- GitLab