Skip to content
Snippets Groups Projects
Commit 171623c1 authored by Afnan  Ramadhan's avatar Afnan Ramadhan
Browse files

feat: test containerize Apache

parent b0aee424
2 merge requests!10chore: add model version control and finally can trigger the sparking flow...,!1containerize-Apache
Pipeline #66330 passed with stages
in 13 seconds
before_script: stages:
- if [ ! -d "venv" ]; then python3 -m venv .venv; fi - build
- source .venv/bin/activate - run
- pip install -r requirements.txt
test_job: build:
stage: test stage: build
tags: tags:
- macos - macos
only: only:
- main - test
script: script:
- echo "Running tests" - echo "Build Docker Container"
- .venv/bin/python test.py - docker compose build
run:
stage: run
tags:
- macos
only:
- test
script:
- echo "Run Docker Container"
- docker compose up -d
\ No newline at end of file
FROM apache/airflow:2.7.1-python3.11
USER root
RUN apt-get update && \
apt-get install -y gcc python3-dev openjdk-11-jdk && \
apt-get clean
ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-arm64
USER airflow
RUN pip install \
apache-airflow \
apache-airflow-providers-apache-spark \
'apache-airflow-providers-openlineage>=1.8.0' \
pyspark
\ No newline at end of file
AIRFLOW__CORE__LOAD__EXAMPLES=FALSE
AIRFLOW__CORE__EXECUTOR=LocalExecutor
AIRFLOW_WEBSERVER_BASE_URL=http://localhost:8080
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres:5432/airflow
AIRFLOW_WEBSERVER_SECRET_KEY=password
\ No newline at end of file
version: '3'
x-spark-common: &spark-common
image: bitnami/spark:latest
volumes:
- ./jobs:/opt/bitnami/spark/jobs
networks:
- airflow
x-airflow-common: &airflow-common
build:
context: .
dockerfile: Dockerfile
env_file:
- airflow.env
volumes:
- ./jobs:/opt/airflow/jobs
- ./dags:/opt/airflow/dags
- ./logs:/opt/airflow/logs
depends_on:
- postgres
networks:
- airflow
services:
spark-master:
<<: *spark-common
command: bin/spark-class org.apache.spark.deploy.master.Master
ports:
- "9090:8080"
- "7077:7077"
spark-worker:
<<: *spark-common
command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077
depends_on:
- spark-master
environment:
SPARK_MODE: worker
SPARK_WORKER_CORES: 2
SPARK_WORKER_MEMORY: 1g
SPARK_MASTER_URL: spark://spark-master:7077
postgres:
image: postgres:14.0
environment:
- POSTGRES_USER=airflow
- POSTGRES_PASSWORD=airflow
- POSTGRES_DB=airflow
networks:
- airflow
webserver:
<<: *airflow-common
command: bash -c "airflow db init && airflow webserver"
ports:
- "8080:8080"
depends_on:
- scheduler
scheduler:
<<: *airflow-common
command: bash -c "airflow db init && airflow db migrate && airflow users create --username admin --firstname Admin --lastname Admin --role Admin --email admin@example.com --password admin && airflow scheduler"
networks:
airflow:
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment