diff --git a/airflow/airflow.db b/airflow/airflow.db index 67be4f24f56cd61a813f0314f1c844c8e82f8406..0537636887a40e69e4c7f511c97e7bc5ecfbd190 100644 Binary files a/airflow/airflow.db and b/airflow/airflow.db differ diff --git a/docker-compose.yml b/docker-compose.yml index 71a008bf5d8c86497764b0fff7dacc4ebe8d02e0..ecef378dfdef6df73edec708c8f29bcf83c058e6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,44 +10,46 @@ services: ports: - "8080:8080" networks: - - airflow-spark-network + - spark-cluster depends_on: - spark-master command: bash -c "rm -f /opt/airflow/airflow-webserver.pid && airflow db init && (airflow scheduler & airflow webserver)" spark-master: - image: bde2020/spark-master:3.3.0-hadoop3.3 + image: bitnami/spark:latest container_name: spark-master + environment: + - SPARK_MODE=master + - SPARK_MASTER_HOST=spark-master + - SPARK_MASTER_PORT=7077 ports: - - 8081:8080 - - 7077:7077 + - "8081:8080" # Web UI for Spark Master + - "7077:7077" # Spark Master Port for worker connections networks: - - airflow-spark-network + - spark-cluster volumes: - ./workspace:/opt/workspace + spark-worker-1: - image: bde2020/spark-worker:3.3.0-hadoop3.3 + image: bitnami/spark:latest container_name: spark-worker-1 environment: - - SPARK_WORKER_CORES=1 - - SPARK_WORKER_MEMORY=512m - ports: - - 8082:8081 + - SPARK_MODE=worker + - SPARK_MASTER=spark://spark-master:7077 networks: - - airflow-spark-network + - spark-cluster volumes: - ./workspace:/opt/workspace depends_on: - spark-master + spark-worker-2: - image: bde2020/spark-worker:3.3.0-hadoop3.3 + image: bitnami/spark:latest container_name: spark-worker-2 environment: - - SPARK_WORKER_CORES=1 - - SPARK_WORKER_MEMORY=512m - ports: - - 8083:8081 + - SPARK_MODE=worker + - SPARK_MASTER=spark://spark-master:7077 networks: - - airflow-spark-network + - spark-cluster volumes: - ./workspace:/opt/workspace depends_on: @@ -57,5 +59,5 @@ volumes: shared-workspace: networks: - airflow-spark-network: + spark-cluster: driver: bridge