diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7f97813ba50d882cb047183c2a68f5614af13d74..dea653607d6ff7034c164d6f192a8ad4f31da691 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -55,31 +55,23 @@ deploy_dag: # Test the DAG execution test_dag: stage: test - image: - name: apache/airflow:2.7.1-python3.11 + image: python:3.11 dependencies: - deploy_dag - services: - - name: postgres:13 - alias: postgres - variables: - POSTGRES_USER: airflow - POSTGRES_PASSWORD: airflow - POSTGRES_DB: airflow - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow - AIRFLOW__CORE__LOAD_EXAMPLES: "False" - AIRFLOW__CORE__EXECUTOR: "LocalExecutor" - before_script: - - mkdir -p $AIRFLOW_HOME/dags - - mkdir -p $AIRFLOW_HOME/logs - - mkdir -p $AIRFLOW_HOME/plugins - - cp $AIRFLOW_DAGS_DIR/model_dag.py $AIRFLOW_HOME/dags/ - - pip install apache-airflow-providers-apache-spark==2.1.1 pyspark==3.5.0 apache-airflow-providers-openlineage>=1.8.0 pandas mlflow scikit-learn script: - - airflow db init - - nohup airflow scheduler > /dev/null 2>&1 & - - nohup airflow webserver -p 8080 > /dev/null 2>&1 & - - sleep 20 - - airflow dags trigger --wait model_training_and_tracking - - airflow tasks state model_training_and_tracking train_and_log_model - - airflow tasks logs model_training_and_tracking train_and_log_model \ No newline at end of file + # Activate virtual environment + - python -m venv venv + - source venv/bin/activate + - pip install --upgrade pip + - pip install apache-airflow[celery,postgres,s3]==2.7.1 + + # Validate Airflow DAGs + - export AIRFLOW_HOME=$AIRFLOW_HOME + - airflow dags list + + - airflow dags test model_dag 2025-01-01 + + artifacts: + when: always + paths: + - $AIRFLOW_DAGS_DIR \ No newline at end of file