Flink On Yarn模式配置
- Flink On Yarn模式配置
- 引言
- 一、安裝JDK
- 二、安裝Hadoop
- 三、安裝Zookeeper
- 四、安裝Flink
Flink On Yarn模式配置
引言
? Flink依靠Yarn來實作高可用,由于Yarn依賴于Hadoop,而Hadoop又依賴于Jdk,
? 準備三臺機器
? 1.1.1.1 node1
? 1.1.1.2 node2
? 1.1.1.3 node3
一、安裝JDK
1. 下載解壓
tar -xvf jdk-8u271-linux-x64.tar.gz -C /usr/local
mv jdk_1.8.271 jdk
2. 配置環境變數
export JAVA_HOME=/usr/local/jdk
export PATH=$PATH:$JAVA_HOME/bin
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
3. 驗證
java -version
二、安裝Hadoop
1. 配置hosts,做主機名到ip地址映射,每臺機器都要更改
vi /etc/hosts
添加如下內容
1.1.1.1 node1
1.1.1.2 node2
1.1.1.3 node3
2. 配置ssh免密登錄
ssh-keygen
ssh-copy-id node2
ssh-copy-id node3
3. 解壓hadoop安裝包
tar -xvf hadoop-2.10.1.tar.gz -C /usr/local
mv hadoop-2.10.1 hadoop
4. 配置環境變數
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin
5. 配置HDFS集群
1. hadoop-env.sh
添加jdk路徑
export JAVA_HOME=/usr/local/jdk
2. core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/usr/local/hadoop/data/hdfs/tmp</value>
<description>A base for other temporary directories.</description>
</property>
<!-- sequenceFiles檔案中讀寫快取size設定 單位為KB,131072即默認為64M -->
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns</value>
</property>
<!-- 允許root用戶在任意主機節點代理任意的用戶組 -->
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/usr/local/hadoop/data/hdfs/journal</value>
</property>
<!-- zookeeper資訊 -->
<property>
<name>ha.zookeeper</name>
<value>node1:2181,node2:2181,node3:2181</value>
</property>
</configuration>
3. hdfs-site.xml
<configuration>
<property>
<!-- 分片數量 -->
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<!-- 切分的block大小 單位為KB 即128M-->
<name>dfs.block.size</name>
<value>134217728</value>
</property>
<property>
<!-- namenode在本地元資料的存盤路徑 -->
<name>dfs.namenode.name.dir</name>
<value>file:///usr/local/hadoop/data/hdfs/namenode</value>
</property>
<property>
<!-- datanode在本地存放block的存盤路徑 -->
<name>dfs.datanode.data.dir</name>
<value>file:///usr/local/hadoop/data/hdfs/datanode</value>
</property>
<property>
<!-- namenode日志檔案存盤路徑 -->
<name>dfs.namenode.edits.dir</name>
<value>file:///usr/local/hadoop/data/hdfs/nn/edits</value>
</property>
<!-- 集群名 -->
<property>
<name>dfs.nameservices</name>
<value>ns</value>
</property>
<!-- 配置兩個namenode,另一個為standby模式 -->
<property>
<name>dfs.ha.namenodes.ns</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.ns.nn1</name>
<value>node1:9000</value>
</property>
<property>
<name>dfs.namenode.rpc-address.ns.nn2</name>
<value>node2:9000</value>
</property>
<property>
<name>dfs.namenode.http-address.ns.nn1</name>
<value>node1:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.ns.nn2</name>
<value>node2:50070</value>
</property>
<property>
<name>dfs.namenode.shard.edits.dir</name>
<value>qjournal://node1:8485;node2:8485;node3:8485/ns</value>
</property>
<property>
<!-- secondaryNamenode的網頁埠號 -->
<name>dfs.namenode.secondary.http-address</name>
<value>node1:9001</value>
</property>
<property>
<!-- 不區分namenode和datanode的埠號,可直接使用namenode的ip埠號進行所有的webhdfs操作 -->
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled.ns</name>
<value>true</value>
</property>
<property>
<!-- 每個用戶存取檔案時,是否檢查權限 -->
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>~/.ssh/id_rsa</value>
</property>
<!-- 解決 識別不到集群名ns例外:UnknownHostsException:ns -->
<property>
<name>dfs.client.failover.proxy.provider.ns</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
</configuration>
4. mapred-site.xml
<configuration>
<!-- 執行框架為yarn -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- jobhistory地址 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>node1:10200</value>
</property>
<!-- jobhistory網頁地址 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>node1:19888</value>
</property>
</configuration>
6. 配置yarn集群
yarn-site.xml
<configuration>
<!-- nodemanager上運行的附屬服務,不配置成mapreduce_shuffle則無法運行mapreduce程式 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>ns</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>node1</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>node2</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>node1:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>node2:8088</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!-- 基于zookeeper的HA高可用 -->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<!-- 開啟日志聚合功能 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 日志保留設定為7天 -->
<property>
<name>yarn.log-aggregation-retain-seconds</name>
<value>604800</value>
</property>
<!-- 配置為zookeeper存盤時,指定zookeeper集群的地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>node1:2181,node2:2181,node3:2181</value>
</property>
<!-- nodemanager運行記憶體,必須大于或等于1024,否則nodemanager啟動不成功 -->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>4096</value>
</property>
<!-- 關閉yarn記憶體檢查 -->
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.client.failover-proxy-provider</name>
<value>org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- application master在重啟時,最大的嘗試次數 -->
<property>
<name>yarn.resourcemanager.am.max-attempts</name>
<value>10</value>
</property>
</configuration>
7. 將/usr/local/hadoop檔案夾分發給slave1和slave2
scp -r hadoop root@node2:/usr/local/
scp -r hadoop root@node3:/usr/local/
8. 啟動集群
1) 在node1上
hdfs zkfc -formatZK
2) 在三個節點分別啟動
hadoop-daemon.sh start journalnode
3) 在node1
hdfs namenode -format
hadoop-daemon.sh start namenode
4) 在node2上
hdfs namenode -bootstrapStandby
hadoop-daemon.sh start namenode
5) 在node1和node2上
hadoop-daemon.sh start zkfc
6) 在三個節點上分別啟動
hadoop-daemon.sh start datanode
7) 在node1和node2上
yarn-daemon.sh start resourcemanager
8) 在三個節點上分別啟動
yarn-daemon.sh start nodemanager
10. 驗證
jps
日常啟動
在三個節點分別啟動
hadoop-daemon.sh start journalnode
在node1和node2啟動
hadoop-daemon.sh start zkfc
一鍵啟動
start-dfs.sh
start-yarn.sh
三、安裝Zookeeper
1. 下載解壓
tar -xvf apache-zookeeper-3.5.9-bin.tar.gz -C /usr/local
mv /usr/local/apache-zookeeper-3.5.9 /usr/local/zookeeper
2. 修改用戶名和用戶組權限
chown -R root:root zookeeper/
3. 配置環境變數
4. 修改組態檔
cp zoo_sample.cfg zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/usr/local/zookeeper/tmp/data/zookeeper
dataLogDir=/usr/local/zookeeper/tmp/log/zookeeper
# the port at which the clients will connect
clientPort=2181
autopurge.purgeInterval=1
server.1=node1:2888:3888
server.2=node2:2888:3888
server.3=node3:2888:3888
# 注:server.1中的1為服務器id,需要與myid中的id一致
# 每個節點重復以上步驟
5. 設定服務器id
touch /usr/local/zookeeper/tmp/data/zookeeper/myid
echo 1 > /usr/local/zookeeper/tmp/data/zookeeper/myid
# node2 2 , node3中echo 3
6. 啟動服務器
zkServer.sh start
7. 連接客戶端
zkCli.sh -server node1:2181
四、安裝Flink
1. 下載解壓
tar -xvf flink-1.13.2-bin-scala_2.11.tgz -C /usr/local/
mv /usr/local/flink-1.13.2 /usr/local/flink
2. 配置環境變數
export HADOOP_CLASSPATH=`/usr/local/hadoop/bin/hadoop classpath`
export FLINK_HOME=/usr/local/flink
3. 編輯組態檔
vi flink-conf.yaml
# JobManager記憶體主要分為四部分:JVM Heap、Off-Heap Memory、JVM Metaspace、JVM Overhead
# JobManager總記憶體設定為2048m,則JVM Overhead可根據0.1的fraction換算得到204.8m,即JVM Overhead記憶體為205m
# JVM Metaspace默認為256m
# Off-Heap Memory默認為128m
# JVM Heap最終被推斷為2048m-205m-256m - 128m = 1459m,即1.42g
# 但gc演算法會占用一小部分固定記憶體作為Non-Heap,占用大小為0.05g
# JVM Heap實際大小為1.42g - 0.05g = 1.38g
jobmanager.rpc.address: node1
jobmanager.rpc.port: 6123
#JobManager jvm堆大小,主要取決于運行的作業數量、作業結構及用戶代碼的要求
jobmanager.heap.size: 1024m
#行程總記憶體
jobmanager.memory.process.size: 2048m
taskmanager.memory.process.size: 4096m
#每個TaskManager提供的任務Slots數量,建議與cpu核數一致
taskmanager.numberOfTaskSlots: 4
parallelism.default: 1
env.hadoop.conf.dir: /usr/local/hadoop/etc/hadoop
high-availability: zookeeper
# flink在重啟時,嘗試的最大次數
yarn.application-attempts: 10
high-availability.storageDir: hdfs://ns/flink/recovery
high-availability.zookeeper.quorum: node1:2181,node2:2181,node3:2181
high-availability.zookeeper.path.root: /flink
#用于存盤和檢查點狀態
state.backend: filesystem
state.checkpoints.dir: hdfs://ns/flink/checkpoints
state.savepoints.dir: hdfs://ns/flink/savepoints
#故障轉移策略
jobmanager.execution.failover-strategy: region
rest.port: 8081
#是否啟動web提交
web.submit.enable: true
io.tmp.dirs: /usr/local/flink/data/tmp
env.log.dir: /usr/local/flink/data/logs
taskmanager.memory.network.fraction: 0.1
taskmanager.memory.network.min: 64mb
taskmanager.memory.network.max: 1gb
fs.hdfs.hadoopconf: /usr/local/hadoop/etc/hadoop
historyserver.web.address: 0.0.0.0
historyserver.web.port: 8082
historyserver.archive.fs.refresh-interval: 10000
4. 修改masters
node1:8081
node2:8081
5. 修改workers
node1
node2
node3
6. 修改zoo.cfg
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/usr/local/flink/data/tmp/zookeeper/dataDir
dataLogDir=/usr/local/flink/data/tmp/zookeeper/dataLogDir
clientPort=2181
server.1=node1:2888:3888
server.2=node2:2888:3888
server.3=node3:2888:3888
7. 添加jar包
flink-shaded-hadoop-2-uber-2.8.3-10.0.jar
8. 啟動flink yarn session模式
yarn-session.sh
轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/323373.html
標籤:其他
上一篇:房屋中介管理系統
