diff --git a/airflow/dags/functions/preprocess_training_data.py b/airflow/dags/functions/preprocess_training_data.py index 382905e28d67591bc4de26bef6fbfd7047a127b8..6cf827b0639c5b214d5fe37b2b6712362c1108a7 100644 --- a/airflow/dags/functions/preprocess_training_data.py +++ b/airflow/dags/functions/preprocess_training_data.py @@ -59,6 +59,8 @@ def preprocess(input_path, bucket_name, output_object_key): for col_name in categorical_col: df[col_name] = df[col_name].astype("category").cat.codes + df['TotalCharges'] = ps.to_numeric(df['TotalCharges'], errors='coerce').fillna(0) + print("=============================================================") print("Done preprocessing") print(df.head())