Skip to content
Snippets Groups Projects
Commit 33db810a authored by Afnan  Ramadhan's avatar Afnan Ramadhan
Browse files

Merge branch 'fix/fixing-docker' into 'develop'

fix/fixing-docker

See merge request !3
parents a9458ed8 de53aa79
Branches
2 merge requests!10chore: add model version control and finally can trigger the sparking flow...,!3fix/fixing-docker
Pipeline #66353 passed with stages
in 3 minutes and 54 seconds
......@@ -3,13 +3,13 @@ FROM apache/airflow:2.7.1-python3.11
USER root
RUN apt-get update && \
apt-get install -y gcc python3-dev openjdk-11-jdk && \
apt-get install -y gcc python3-dev openjdk-17-jdk && \
apt-get clean
ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-arm64
ENV JAVA_HOME /usr/lib/jvm/java-17-openjdk-arm64
USER airflow
COPY requirements.txt /app/requirements.txt
RUN pip install --no-cache-dir -r /app/requirements.txt
\ No newline at end of file
RUN pip install --no-cache-dir -r /app/requirements.txt
import airflow
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.providers.apache.spark.operators.spark_submit import SparkSubmitOperator
dag = DAG(
dag_id = "sparking_flow",
default_args = {
"owner": "admin",
"start_date": airflow.utils.dates.days_ago(1)
},
schedule_interval = "@daily"
)
start = PythonOperator(
task_id="start",
python_callable = lambda: print("Jobs started"),
dag=dag
)
python_job = SparkSubmitOperator(
task_id="python_job",
conn_id="spark_conn",
application="jobs/python/wordcount.py",
dag=dag,
)
end = PythonOperator(
task_id="end",
python_callable = lambda: print("Jobs completed successfully"),
dag=dag
)
start >> python_job >> end
\ No newline at end of file
version: '3'
x-spark-common: &spark-common
image: bitnami/spark:latest
image: bitnami/spark:3.5.1
volumes:
- ./jobs:/opt/bitnami/spark/jobs
networks:
......@@ -26,6 +26,8 @@ services:
spark-master:
<<: *spark-common
command: bin/spark-class org.apache.spark.deploy.master.Master
environment:
- SPARK_MASTER_HOST=spark-master
ports:
- "9090:8080"
- "7077:7077"
......@@ -40,6 +42,7 @@ services:
SPARK_WORKER_CORES: 2
SPARK_WORKER_MEMORY: 1g
SPARK_MASTER_URL: spark://spark-master:7077
SPARK_MASTER: spark://spark-master:7077
postgres:
image: postgres:14.0
......
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("PythonWordCount").getOrCreate()
text = "Hello Spark Hello Python Hello Airflow Hello Docker and Hello Yusuf"
words = spark.sparkContext.parallelize(text.split(" "))
wordCounts = words.map(lambda word: (word, 1)).reduceByKey(lambda a, b: a + b)
for wc in wordCounts.collect():
print(wc[0], wc[1])
spark.stop()
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment