HDFS 部署之四 (yarn和mapreduce)

当前环境是《HDFS 部署之三(完全分布式 高可用)》 的环境

以下操作全部在 node-1 节点上执行

cd $HADOOP_HOME

# 编辑 hadoop-env.sh 文件底部添加如下
# YAEN
export YARN_RESOURCEMANAGER_USER=root
export HDFS_DATANODE_SECURE_USER=root
export YARN_NODEMANAGER_USER=root

# 编辑 yarn-site.xml 内容如下
<configuration>
    <!-- 指定mr框架为yarn方式 -->
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <!-- 指定mapreduce jobhistory地址 -->
    <property>
        <name>mapreduce.jobhistory.address</name>
        <value>node-1:10020</value>
    </property>
    <!-- 任务历史服务器的web地址 -->
    <property>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>node-1:19888</value>
    </property>
    <property>
        <name>mapreduce.application.classpath</name>
        <value>
            /usr/local/hadoop-3.1.3/etc/hadoop,
            /usr/local/hadoop-3.1.3/share/hadoop/common/*,
            /usr/local/hadoop-3.1.3/share/hadoop/common/lib/*,
            /usr/local/hadoop-3.1.3/share/hadoop/hdfs/*,
            /usr/local/hadoop-3.1.3/share/hadoop/hdfs/lib/*,
            /usr/local/hadoop-3.1.3/share/hadoop/mapreduce/*,
            /usr/local/hadoop-3.1.3/share/hadoop/mapreduce/lib/*,
            /usr/local/hadoop-3.1.3/share/hadoop/yarn/*,
            /usr/local/hadoop-3.1.3/share/hadoop/yarn/lib/*
        </value>
    </property>
</configuration>

# 编辑 mapred-site.xml 内容如下
<configuration>
    <!-- 开启RM高可用 -->
    <property>
        <name>yarn.resourcemanager.ha.enabled</name>
        <value>true</value>
    </property>
    <!-- 指定RM的cluster id -->
    <property>
        <name>yarn.resourcemanager.cluster-id</name>
        <value>firstcluster</value>
    </property>
    <!-- 指定RM的名字 -->
    <property>
        <name>yarn.resourcemanager.ha.rm-ids</name>
        <value>rm1,rm2</value>
    </property>
    <!-- 分别指定RM的地址 -->
    <property>
        <name>yarn.resourcemanager.hostname.rm1</name>
        <value>node-1</value>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname.rm2</name>
        <value>node-2</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm1</name>
        <value>192.168.20.183:8088</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm2</name>
        <value>192.168.20.171:8088</value>
    </property>
    <!-- 指定zk集群地址 -->
    <property>
        <name>yarn.resourcemanager.zk-address</name>
        <value>node-1:2181,node-2:2181,node-3:2181</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.log-aggregation-enable</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.log-aggregation.retain-seconds</name>
        <value>86400</value>
    </property>
    <!-- 启用自动恢复 -->
    <property>
        <name>yarn.resourcemanager.recovery.enabled</name>
        <value>true</value>
    </property>
    <!-- 制定resourcemanager的状态信息存储在zookeeper集群上 -->
    <property>
        <name>yarn.resourcemanager.store.class</name>
        <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
    </property>
    <!-- Whether virtual memory limits will be enforced for containers.  -->
    <property>
        <name>yarn.nodemanager.vmem-check-enabled</name>
        <value>false</value>
    </property>
    <property>
        <name>yarn.nodemanager.vmem-pmem-ratio</name>
        <value>5</value>
    </property>
</configuration>

# 拷贝配置文件到其他两个节点
scp mapred-site.xml yarn-site.xml hadoop-env.sh node-2:/usr/local/hadoop-3.1.3/etc/hadoop/
scp mapred-site.xml yarn-site.xml hadoop-env.sh node-3:/usr/local/hadoop-3.1.3/etc/hadoop/

# 启动 historyserver
/usr/local/hadoop-3.1.3/bin/mapred --daemon start historyserver

# 启动 yarn
/usr/local/hadoop-3.1.3/sbin/start-yarn.sh

浏览器访问 查看状态

访问(active) http://192.168.20.171:8088/cluster/cluster
file

访问(standby) http://192.168.20.183:8088/cluster/cluster
file

Comments Closed.