diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 56f235f7b66e9dcc4a5d1ce9cb07062bcdeddb47..c34599d36a4ff68547981aa7d202e15540a97f92 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,14 +1,25 @@ -before_script: - - if [ ! -d "venv" ]; then python3 -m venv .venv; fi - - source .venv/bin/activate - - pip install -r requirements.txt +stages: + - build + - run -test_job: - stage: test +build: + stage: build tags: - macos only: - main + - develop script: - - echo "Running tests" - - .venv/bin/python test.py + - echo "Build Docker Container" + - docker compose build + +run: + stage: run + tags: + - macos + only: + - main + - develop + script: + - echo "Run Docker Container" + - docker compose up -d diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..eea8cbbede893f388c8f8b0496e9d23c4a0bf7ed --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +FROM apache/airflow:2.7.1-python3.11 + +USER root + +RUN apt-get update && \ + apt-get install -y gcc python3-dev openjdk-11-jdk && \ + apt-get clean + +ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-arm64 + +USER airflow + +COPY requirements.txt /app/requirements.txt + +RUN pip install --no-cache-dir -r /app/requirements.txt \ No newline at end of file diff --git a/airflow.env b/airflow.env new file mode 100644 index 0000000000000000000000000000000000000000..329e1eee0e3154c554eede5cc6e60c7e55cb4571 --- /dev/null +++ b/airflow.env @@ -0,0 +1,5 @@ +AIRFLOW__CORE__LOAD__EXAMPLES=FALSE +AIRFLOW__CORE__EXECUTOR=LocalExecutor +AIRFLOW_WEBSERVER_BASE_URL=http://localhost:8080 +AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres:5432/airflow +AIRFLOW_WEBSERVER_SECRET_KEY=password \ No newline at end of file diff --git a/dags/.keep b/dags/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000000000000000000000000000000000..af4f61c26161b77796b8c484931b1f30bc80d291 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,66 @@ +version: '3' + +x-spark-common: &spark-common + image: bitnami/spark:latest + volumes: + - ./jobs:/opt/bitnami/spark/jobs + networks: + - airflow + +x-airflow-common: &airflow-common + build: + context: . + dockerfile: Dockerfile + env_file: + - airflow.env + volumes: + - ./jobs:/opt/airflow/jobs + - ./dags:/opt/airflow/dags + - ./logs:/opt/airflow/logs + depends_on: + - postgres + networks: + - airflow + +services: + spark-master: + <<: *spark-common + command: bin/spark-class org.apache.spark.deploy.master.Master + ports: + - "9090:8080" + - "7077:7077" + + spark-worker: + <<: *spark-common + command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077 + depends_on: + - spark-master + environment: + SPARK_MODE: worker + SPARK_WORKER_CORES: 2 + SPARK_WORKER_MEMORY: 1g + SPARK_MASTER_URL: spark://spark-master:7077 + + postgres: + image: postgres:14.0 + environment: + - POSTGRES_USER=airflow + - POSTGRES_PASSWORD=airflow + - POSTGRES_DB=airflow + networks: + - airflow + + webserver: + <<: *airflow-common + command: bash -c "airflow db init && airflow webserver" + ports: + - "8080:8080" + depends_on: + - scheduler + + scheduler: + <<: *airflow-common + command: bash -c "airflow db init && airflow db migrate && airflow users create --username admin --firstname Admin --lastname Admin --role Admin --email admin@example.com --password admin && airflow scheduler" + +networks: + airflow: \ No newline at end of file diff --git a/jobs/python/.keep b/jobs/python/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/requirements.txt b/requirements.txt index e214769a654c0d5b900d76a14e8910929d9dc52f..5d8e452e9ba0ee25c3df85b48a2360a8ca6427e9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,4 @@ pyspark +apache-airflow +apache-airflow-providers-apache-spark +apache-airflow-providers-openlineage>=1.8.0 \ No newline at end of file