Skip to content
Snippets Groups Projects
Commit 61b8f78c authored by rayhanp1402's avatar rayhanp1402
Browse files
parents a60bcda9 faa9a281
No related merge requests found
Pipeline #66495 canceled with stages
stages: stages:
- train - setup
- deploy
- test
variables: variables:
# Define the virtual environment or base image # Define variables
VENV: venv AIRFLOW_HOME: /opt/airflow
MLFLOW_TRACKING_URI: $MLFLOW_TRACKING_URI AIRFLOW_DAGS_DIR: /opt/airflow/dags
MLFLOW_EXPERIMENT_NAME: $MLFLOW_EXPERIMENT_NAME AIRFLOW_DATA_DIR: /opt/airflow/data
DATA_PATH: "data/churn.csv" DATA_PATH: "data/churn.csv"
MLFLOW_TRACKING_URI: http://mlflow:5000
MLFLOW_EXPERIMENT_NAME: "Customer Churn Model"
train_model: # Setup Airflow environment
stage: train setup_airflow:
stage: setup
image: python:3.11 image: python:3.11
script: script:
# Install dependencies # Install dependencies
- python -m venv $VENV - python -m venv venv
- source $VENV/bin/activate - source venv/bin/activate
- pip install --upgrade pip - pip install --upgrade pip
- pip install numpy pandas scikit-learn mlflow - pip install apache-airflow[celery,postgres,s3]==2.7.1 mlflow pandas scikit-learn
# Train the model and log to MLflow
- mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./mlruns --host 127.0.0.1 --port 5000 &
- python model/train_model.py
only: # Initialize Airflow directories
- main - mkdir -p $AIRFLOW_DATA_DIR
# Copy the data to Airflow data directory
- cp $DATA_PATH $AIRFLOW_DATA_DIR
cache:
key: "$CI_COMMIT_REF_SLUG"
paths:
- $AIRFLOW_DATA_DIR
# Deploy DAGs
deploy_dag:
stage: deploy
image: python:3.11
dependencies:
- setup_airflow
script:
# Deploy DAG to Airflow
- mkdir -p $AIRFLOW_HOME/dags $AIRFLOW_HOME/logs $AIRFLOW_HOME/plugins
- cp dags/model_dag.py $AIRFLOW_DAGS_DIR
# List deployed DAGs for verification
- ls -l $AIRFLOW_DAGS_DIR
artifacts:
paths:
- $AIRFLOW_DAGS_DIR
# Test the DAG execution
test_dag:
stage: test
image: python:3.11
dependencies:
- deploy_dag
services:
- name: postgres:13
alias: postgres
variables:
POSTGRES_USER: airflow
POSTGRES_PASSWORD: airflow
POSTGRES_DB: airflow
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
AIRFLOW__CORE__LOAD_EXAMPLES: "False"
AIRFLOW__CORE__EXECUTOR: "LocalExecutor"
before_script:
# Set up the Airflow environment
- python -m venv venv
- source venv/bin/activate
- pip install --upgrade pip
- pip install apache-airflow[postgres]==2.7.1 apache-airflow-providers-apache-spark==2.1.1
- pip install pyspark==3.5.0 apache-airflow-providers-openlineage>=1.8.0 pandas mlflow scikit-learn
script:
- airflow db init
- airflow scheduler &
- airflow webserver -p 8080 &
- sleep 20
# Trigger the DAG
- airflow dags trigger model_training_and_tracking
# Wait for the DAG run to complete
- |
DAG_RUN_ID=$(airflow dags list-runs -d model_training_and_tracking --no-table-headers --state running | awk '{print $1}')
while [ -n "$DAG_RUN_ID" ]; do
echo "Waiting for DAG run to complete..."
sleep 10
DAG_RUN_ID=$(airflow dags list-runs -d model_training_and_tracking --no-table-headers --state running | awk '{print $1}')
done
# Check the task state
- airflow tasks state model_training_and_tracking train_and_log_model
# Fetch logs for the task
- airflow tasks logs model_training_and_tracking train_and_log_model
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment