From 171623c145ff9bf7422f643d97d74bcddaf7078b Mon Sep 17 00:00:00 2001
From: afnanramadhan <13521011@std.stei.itb.ac.id>
Date: Wed, 8 Jan 2025 04:59:35 +0700
Subject: [PATCH] feat: test containerize Apache

---
 .gitlab-ci.yml     | 27 ++++++++++++-------
 Dockerfile         | 17 ++++++++++++
 airflow.env        |  5 ++++
 docker-compose.yml | 66 ++++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt   |  2 ++
 5 files changed, 108 insertions(+), 9 deletions(-)
 create mode 100644 Dockerfile
 create mode 100644 airflow.env
 create mode 100644 docker-compose.yml

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 56f235f..d42835a 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,14 +1,23 @@
-before_script:
-  - if [ ! -d "venv" ]; then python3 -m venv .venv; fi
-  - source .venv/bin/activate
-  - pip install -r requirements.txt
+stages:
+  - build
+  - run
 
-test_job:
-  stage: test
+build:
+  stage: build
   tags:
     - macos
   only:
-    - main
+    - test
   script:
-    - echo "Running tests"
-    - .venv/bin/python test.py
+    - echo "Build Docker Container"
+    - docker compose build
+  
+run:
+  stage: run
+  tags:
+    - macos
+  only:
+    - test
+  script:
+    - echo "Run Docker Container"
+    - docker compose up -d
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..484cbe9
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,17 @@
+FROM apache/airflow:2.7.1-python3.11
+
+USER root
+
+RUN apt-get update && \
+    apt-get install -y gcc python3-dev openjdk-11-jdk && \
+    apt-get clean
+
+ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-arm64
+
+USER airflow
+
+RUN pip install \
+    apache-airflow \
+    apache-airflow-providers-apache-spark \
+    'apache-airflow-providers-openlineage>=1.8.0' \
+    pyspark    
\ No newline at end of file
diff --git a/airflow.env b/airflow.env
new file mode 100644
index 0000000..329e1ee
--- /dev/null
+++ b/airflow.env
@@ -0,0 +1,5 @@
+AIRFLOW__CORE__LOAD__EXAMPLES=FALSE
+AIRFLOW__CORE__EXECUTOR=LocalExecutor
+AIRFLOW_WEBSERVER_BASE_URL=http://localhost:8080
+AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres:5432/airflow
+AIRFLOW_WEBSERVER_SECRET_KEY=password
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..af4f61c
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,66 @@
+version: '3'
+
+x-spark-common: &spark-common
+  image: bitnami/spark:latest
+  volumes:
+    - ./jobs:/opt/bitnami/spark/jobs
+  networks:
+    - airflow
+
+x-airflow-common: &airflow-common
+  build:
+    context: .
+    dockerfile: Dockerfile
+  env_file:
+    - airflow.env
+  volumes:
+    - ./jobs:/opt/airflow/jobs
+    - ./dags:/opt/airflow/dags
+    - ./logs:/opt/airflow/logs
+  depends_on:
+    - postgres
+  networks:
+    - airflow
+
+services:
+  spark-master:
+    <<: *spark-common
+    command: bin/spark-class org.apache.spark.deploy.master.Master
+    ports:
+      - "9090:8080"
+      - "7077:7077"
+
+  spark-worker:
+    <<: *spark-common
+    command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077
+    depends_on:
+      - spark-master
+    environment:
+      SPARK_MODE: worker
+      SPARK_WORKER_CORES: 2
+      SPARK_WORKER_MEMORY: 1g
+      SPARK_MASTER_URL: spark://spark-master:7077
+
+  postgres:
+    image: postgres:14.0
+    environment:
+      - POSTGRES_USER=airflow
+      - POSTGRES_PASSWORD=airflow
+      - POSTGRES_DB=airflow
+    networks:
+      - airflow
+
+  webserver:
+    <<: *airflow-common
+    command: bash -c "airflow db init && airflow webserver"
+    ports:
+      - "8080:8080"
+    depends_on:
+      - scheduler
+
+  scheduler:
+    <<: *airflow-common
+    command: bash -c "airflow db init && airflow db migrate && airflow users create --username admin --firstname Admin --lastname Admin --role Admin --email admin@example.com --password admin && airflow scheduler"
+
+networks:
+  airflow:
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index e214769..3955dc9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,3 @@
 pyspark
+apache-airflow
+apache-airflow-providers-apache-spark
-- 
GitLab