Skip to content
Snippets Groups Projects
Commit e40d249f authored by Naufal-Nalendra's avatar Naufal-Nalendra
Browse files

feat: create basic ML model

parent 4e8349a4
No related merge requests found
model/mlruns
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import mlflow
import mlflow.sklearn
# Load dataset
# TODO clean dataset
data = pd.read_csv('../data/churn.csv')
# Split into features and target
X = data.drop(columns=['Churn'])
y = data['Churn'].map({'Yes': 1, 'No': 0})
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Define individual models
logistic_model = LogisticRegression(max_iter=1000, random_state=42)
random_forest_model = RandomForestClassifier(n_estimators=100, random_state=42)
gradient_boosting_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
ensemble_model = VotingClassifier(
estimators=[
('logistic', logistic_model),
('random_forest', random_forest_model),
('gradient_boosting', gradient_boosting_model)
],
voting='soft'
)
# Start MLflow run
with mlflow.start_run():
# Train the model
ensemble_model.fit(X_train, y_train)
# Make predictions
y_pred = ensemble_model.predict(X_test)
# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
# Log parameters
mlflow.log_param("n_estimators", 100)
mlflow.log_param("random_state", 42)
# Log metrics
mlflow.log_metric("accuracy", accuracy)
mlflow.log_metric("precision", precision)
mlflow.log_metric("recall", recall)
mlflow.log_metric("f1_score", f1)
# Log the model
mlflow.sklearn.log_model(ensemble_model, "random_forest_model")
print("Model training and logging complete.")
print(f"Metrics - Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1 Score: {f1}")
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment