一、安装hadoop
二、安装spark
3.1、修改spark-env.sh
# This file is sourced when running various Spark programs.
# Copy it as spark-env.sh and edit that to configure Spark for your site.
export JAVA_HOME=/usr/local/jdk1.8.0_391
export SCALA_HOME=/usr/local/bigdata/scala-2.13.12
export SPARK_HOME=/usr/local/bigdata/spark-3.5.0-bin-hadoop3
export SPARK_CONF=${SPARK_HOME}/conf
export HADOOP_HOME=/usr/local/bigdata/hadoop-3.3.6
export YARN_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
3.2、修改spark-defaults.conf
spark.serializer org.apache.spark.serializer.KryoSerializer
spark.eventLog.enabled true
spark.eventLog.dir hdfs://node4:9001/spark-history
spark.eventLog.compress true
spark.yarn.historyServer.address node4:18080
spark.history.ui.port 18080
spark.history.fs.logDirectory hdfs://node4:9001/spark-history
spark.history.retainedApplications 10
spark.history.fs.update.interval 5s
启动spark
http://192.168.42.142:4040/
做个spark任务
spark-submit --class org.apache.spark.examples.SparkPi --driver-memory 1g --num-executors 1 --executor-memory 512m --executor-cores 1 ${SPARK_HOME}/examples/jars/spark-examples*.jar 100
23/11/10 17:58:40 INFO DAGScheduler: Job 0 finished: reduce at SparkPi.scala:38, took 2.589683 s
Pi is roughly 3.1408783140878316
23/11/10 17:58:40 INFO SparkContext: SparkContext is stopping with exitCode 0.
23/11/10 17:58:40 INFO SparkUI: Stopped Spark web UI at http://node4:4041
23/11/10 17:58:40 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
23/11/10 17:58:40 INFO MemoryStore: MemoryStore cleared
23/11/10 17:58:40 INFO BlockManager: BlockManager stopped
23/11/10 17:58:40 INFO BlockManagerMaster: BlockManagerMaster stopped
23/11/10 17:58:40 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
23/11/10 17:58:40 INFO SparkContext: Successfully stopped SparkContext
23/11/10 17:58:40 INFO ShutdownHookManager: Shutdown hook called
23/11/10 17:58:40 INFO ShutdownHookManager: Deleting directory /tmp/spark-90d31a27-97c6-4a43-b61e-2b3e20b80470
23/11/10 17:58:40 INFO ShutdownHookManager: Deleting directory /tmp/spark-08b33441-38f5-426c-bab4-65aba7d1f365
结合hadoop执行任务
spark-submit --class org.apache.spark.examples.SparkPi --master yarn --deploy-mode cluster --driver-memory 1g --num-executors 1 --executor-memory 512m --executor-cores 1 ${SPARK_HOME}/examples/jars/spark-examples*.jar 1000
23/11/10 18:00:31 INFO Client: Application report for application_1699607542027_0007 (state: FINISHED)
23/11/10 18:00:31 INFO Client:
client token: N/A
diagnostics: N/A
ApplicationMaster host: node4
ApplicationMaster RPC port: 38598
queue: default
start time: 1699610404757
final status: SUCCEEDED
tracking URL: http://node4:8088/proxy/application_1699607542027_0007/
user: root
23/11/10 18:00:31 INFO ShutdownHookManager: Shutdown hook called
23/11/10 18:00:31 INFO ShutdownHookManager: Deleting directory /tmp/spark-1eeae87e-21d7-4602-bde5-eeeac22cf233
23/11/10 18:00:31 INFO ShutdownHookManager: Deleting directory /tmp/spark-6ab0762e-fe37-4070-8992-8a8f03e2a107
[root@node4 conf]#
任务执行成功