Skip to content
Snippets Groups Projects
Commit da3113d9 authored by Azmi Zahrani's avatar Azmi Zahrani
Browse files

Merge branch 'develop' into 'main'

fix: raw data

See merge request !11
parents f3b1a837 b413ff0d
1 merge request!11fix: raw data
Pipeline #66425 failed with stages
in 12 seconds
File deleted
File deleted
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -31,16 +31,18 @@ schema = StructType([
StructField("Churn", StringType(), True)
])
raw_data_path = 'dataset/raw_data'
if os.path.isdir(raw_data_path):
csv_files = [f for f in os.listdir(raw_data_path) if f.endswith('.csv')]
if csv_files:
first_csv_file = os.path.join(raw_data_path, csv_files[0])
df = spark.read.csv(first_csv_file, header=True, schema=schema)
else:
raw_data_path = 'dataset/raw_data'
fallback_csv_path = 'dataset/telco_customer_churn.csv'
if os.path.isdir(raw_data_path):
csv_files = [f for f in os.listdir(raw_data_path) if f.endswith('.csv')]
if csv_files:
first_csv_file = os.path.join(raw_data_path, csv_files[0])
df = spark.read.csv(first_csv_file, header=True, schema=schema)
else:
raise FileNotFoundError("No CSV files found in the specified directory.")
else:
df = spark.read.csv(raw_data_path, header=True, schema=schema)
else:
df = spark.read.csv(fallback_csv_path, header=True, schema=schema)
print("Data dimensions from raw data: ", df.count(), len(df.columns))
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment