From 171623c145ff9bf7422f643d97d74bcddaf7078b Mon Sep 17 00:00:00 2001 From: afnanramadhan <13521011@std.stei.itb.ac.id> Date: Wed, 8 Jan 2025 04:59:35 +0700 Subject: [PATCH] feat: test containerize Apache --- .gitlab-ci.yml | 27 ++++++++++++------- Dockerfile | 17 ++++++++++++ airflow.env | 5 ++++ docker-compose.yml | 66 ++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 ++ 5 files changed, 108 insertions(+), 9 deletions(-) create mode 100644 Dockerfile create mode 100644 airflow.env create mode 100644 docker-compose.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 56f235f..d42835a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,14 +1,23 @@ -before_script: - - if [ ! -d "venv" ]; then python3 -m venv .venv; fi - - source .venv/bin/activate - - pip install -r requirements.txt +stages: + - build + - run -test_job: - stage: test +build: + stage: build tags: - macos only: - - main + - test script: - - echo "Running tests" - - .venv/bin/python test.py + - echo "Build Docker Container" + - docker compose build + +run: + stage: run + tags: + - macos + only: + - test + script: + - echo "Run Docker Container" + - docker compose up -d \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..484cbe9 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +FROM apache/airflow:2.7.1-python3.11 + +USER root + +RUN apt-get update && \ + apt-get install -y gcc python3-dev openjdk-11-jdk && \ + apt-get clean + +ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-arm64 + +USER airflow + +RUN pip install \ + apache-airflow \ + apache-airflow-providers-apache-spark \ + 'apache-airflow-providers-openlineage>=1.8.0' \ + pyspark \ No newline at end of file diff --git a/airflow.env b/airflow.env new file mode 100644 index 0000000..329e1ee --- /dev/null +++ b/airflow.env @@ -0,0 +1,5 @@ +AIRFLOW__CORE__LOAD__EXAMPLES=FALSE +AIRFLOW__CORE__EXECUTOR=LocalExecutor +AIRFLOW_WEBSERVER_BASE_URL=http://localhost:8080 +AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres:5432/airflow +AIRFLOW_WEBSERVER_SECRET_KEY=password \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..af4f61c --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,66 @@ +version: '3' + +x-spark-common: &spark-common + image: bitnami/spark:latest + volumes: + - ./jobs:/opt/bitnami/spark/jobs + networks: + - airflow + +x-airflow-common: &airflow-common + build: + context: . + dockerfile: Dockerfile + env_file: + - airflow.env + volumes: + - ./jobs:/opt/airflow/jobs + - ./dags:/opt/airflow/dags + - ./logs:/opt/airflow/logs + depends_on: + - postgres + networks: + - airflow + +services: + spark-master: + <<: *spark-common + command: bin/spark-class org.apache.spark.deploy.master.Master + ports: + - "9090:8080" + - "7077:7077" + + spark-worker: + <<: *spark-common + command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077 + depends_on: + - spark-master + environment: + SPARK_MODE: worker + SPARK_WORKER_CORES: 2 + SPARK_WORKER_MEMORY: 1g + SPARK_MASTER_URL: spark://spark-master:7077 + + postgres: + image: postgres:14.0 + environment: + - POSTGRES_USER=airflow + - POSTGRES_PASSWORD=airflow + - POSTGRES_DB=airflow + networks: + - airflow + + webserver: + <<: *airflow-common + command: bash -c "airflow db init && airflow webserver" + ports: + - "8080:8080" + depends_on: + - scheduler + + scheduler: + <<: *airflow-common + command: bash -c "airflow db init && airflow db migrate && airflow users create --username admin --firstname Admin --lastname Admin --role Admin --email admin@example.com --password admin && airflow scheduler" + +networks: + airflow: \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e214769..3955dc9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,3 @@ pyspark +apache-airflow +apache-airflow-providers-apache-spark -- GitLab