HDFS 部署之三(完全分布式 高可用)

节点的分配如下

node-1:NN、ZKFC、JN、ZK、DN
node-2:NN、ZKFC、JN、ZK、DN
node-3:JN、ZK、DN

环境准备

1、修改对应host

2、设置各节点ssh相互免密登陆

3、安装fuser命令
yum install -y psmisc

4、下载安装包(下载到/usr/local/src/)
jdk8 官网:https://www.oracle.com/
下载地址: https://www.oracle.com/java/technologies/javase/javase-jdk8-downloads.html

zookeeper 3.4.6 官网:https://zookeeper.apache.org/
下载地址: https://archive.apache.org/dist/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz

hadoop-3.1.3 官网:http://hadoop.apache.org/
下载地址:https://downloads.apache.org/hadoop/common/hadoop-3.1.3/hadoop-3.1.3.tar.gz

注意 没有特别说的都在node-1上操作

部署 jdk(所有节点都要执行)

cd /usr/local/src/
tar zxf jdk-8u241-linux-x64.tar.gz
mv jdk1.8.0_241/ /usr/local/
echo 'export JAVA_HOME=/usr/local/jdk1.8.0_241' >> /etc/profile.d/ecm_env.sh
echo 'export PATH=$PATH:$JAVA_HOME/bin' >> /etc/profile
source /etc/profile

部署ZooKeeper

ssh node-1 "mkdir -p /data/ZooKeeper"
ssh node-2 "mkdir -p /data/ZooKeeper"
ssh node-3 "mkdir -p /data/ZooKeeper"

tar zxf zookeeper-3.4.6.tar.gz
mv zookeeper-3.4.6 /usr/local/

# 修改conf文件夹下的zoo.cfg
[root@node-1 src]# cat /usr/local/zookeeper-3.4.6/conf/zoo.cfg
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/data/ZooKeeper
clientPort=2181
server.1=node-1:2888:3888
server.2=node-2:2888:3888
server.3=node-3:2888:3888

# 创建myid
ssh node-1 "echo 1 > /data/ZooKeeper/myid"
ssh node-2 "echo 2 > /data/ZooKeeper/myid"
ssh node-3 "echo 3 > /data/ZooKeeper/myid"

# 拷贝ZooKeeper目录到其它两个节点
scp -r /usr/local/zookeeper-3.4.6/ node-2:/usr/local/
scp -r /usr/local/zookeeper-3.4.6/ node-3:/usr/local/

# 启动ZooKeeper
ssh node-1 "/usr/local/zookeeper-3.4.6/bin/zkServer.sh start"
ssh node-2 "/usr/local/zookeeper-3.4.6/bin/zkServer.sh start"
ssh node-3 "/usr/local/zookeeper-3.4.6/bin/zkServer.sh start"

# 查看ZK启动结果
[root@node-1 src]# jps
2529 QuorumPeerMain
2561 Jps
[root@node-2 src]# jps
2504 QuorumPeerMain
2540 Jps
[root@node-3 data]# jps
2518 Jps
2488 QuorumPeerMain

部署HDFS

# 以下 “两行” 所有节点都要执行
echo 'export HADOOP_HOME=/usr/local/hadoop-3.1.3' >> /etc/profile.d/hdfs.sh
source /etc/profile

tar zxf hadoop-3.1.3.tar.gz
mv hadoop-3.1.3 /usr/local/
cd /usr/local/hadoop-3.1.3/etc/hadoop/

# hadoop-env.sh 文件底部添加配置如下
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_JOURNALNODE_USER=root
export HDFS_ZKFC_USER=root
export HADOOP_SHELL_EXECNAME=root

# core-site.xml 配置如下
[root@node-1 hadoop]# cat core-site.xml
省略...
<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://firstcluster</value>
    </property>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/data/hadoop</value>
    </property>
    <property>
        <name>hadoop.http.staticuser.user</name>
        <value>root</value>
    </property>
    <property>
        <name>hadoop.http.staticuser.user</name>
        <value>root</value>
    </property>
    <property>
        <name>ha.zookeeper.quorum</name>
        <value>node-1:2181,node-2:2181,node-3:2181</value>
    </property>
</configuration>

# hdfs-site.xml 配置如下
[root@node-1 hadoop]# cat hdfs-site.xml
省略...
<configuration>
    <property>
        <name>dfs.nameservices</name>
        <value>firstcluster</value>
    </property>
    <property>
        <name>dfs.ha.namenodes.firstcluster</name>
        <value>nn1,nn2</value>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.firstcluster.nn1</name>
        <value>node-1:8020</value>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.firstcluster.nn2</name>
        <value>node-2:8020</value>
    </property>
    <property>
        <name>dfs.namenode.http-address.firstcluster.nn1</name>
        <value>node-1:9870</value>
    </property>
    <property>
        <name>dfs.namenode.http-address.firstcluster.nn2</name>
        <value>node-2:9870</value>
    </property>
    <property>
        <name>dfs.namenode.shared.edits.dir</name>
        <value>qjournal://node-1:8485;node-2:8485;node-3:8485/ns</value>
    </property>
    <property>
        <name>dfs.client.failover.proxy.provider.firstcluster</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
    </property>
    <property>
        <name>dfs.ha.fencing.methods</name>
        <value>sshfence</value>
    </property>
    <property>
        <name>dfs.ha.fencing.ssh.private-key-files</name>
        <value>/root/.ssh/id_rsa</value>
    </property>
    <property>
        <name>dfs.journalnode.edits.dir</name>
        <value>/data/journalnode/data</value>
    </property>
    <property>
        <name>dfs.ha.automatic-failover.enabled</name>
        <value>true</value>
    </property>
</configuration>

# workers 配置如下
[root@node-1 hadoop]# cat workers
node-1
node-2
node-3

# 创建数据目录
ssh node-1 "mkdir /data/hadoop && mkdir -p /data/journalnode/data"
ssh node-2 "mkdir /data/hadoop && mkdir -p /data/journalnode/data"
ssh node-3 "mkdir /data/hadoop && mkdir -p /data/journalnode/data"

# 拷贝 hadoop-3.1.3 到其它两台节点
scp -r /usr/local/hadoop-3.1.3/ node-2:/usr/local/
scp -r /usr/local/hadoop-3.1.3/ node-3:/usr/local/

# 启动 JN
ssh node-1 "/usr/local/hadoop-3.1.3/bin/hdfs --daemon start journalnode"
ssh node-2 "/usr/local/hadoop-3.1.3/bin/hdfs --daemon start journalnode"
ssh node-3 "/usr/local/hadoop-3.1.3/bin/hdfs --daemon start journalnode"

# 查看 JN 状态
[root@node-1 src]# jps
2529 QuorumPeerMain
11795 Jps
11727 JournalNode

[root@node-2 src]# jps
11667 JournalNode
2504 QuorumPeerMain
11711 Jps

[root@node-3 data]# jps
11602 JournalNode
2488 QuorumPeerMain
11646 Jps

# 格式化 HDFS
/usr/local/hadoop-3.1.3/bin/hdfs namenode -format

# 拷贝元数据到 node-2,(只拷贝到node-2,因为node-1和node-2是nameNode)
[root@node-1 src]# scp -r /data/hadoop/ node-2:/data/

# 格式化 ZKFC
[root@node-1 src]# /usr/local/hadoop-3.1.3/bin/hdfs zkfc -formatZK

# 启动集群
[root@node-1 hadoop]#  /usr/local/hadoop-3.1.3/sbin/start-dfs.sh
Starting namenodes on [node-1 node-2]
上一次登录:一 4月  6 21:14:34 CST 2020pts/0 上
Starting datanodes
上一次登录:一 4月  6 21:18:37 CST 2020pts/0 上
Starting journal nodes [node-1 node-2 node-3]
上一次登录:一 4月  6 21:18:40 CST 2020pts/0 上
Starting ZK Failover Controllers on NN hosts [node-1 node-2]
上一次登录:一 4月  6 21:19:08 CST 2020pts/0 上

查看集群状态

node-1

file

node-2

file

测试高可用

目前 node-1 是 active,kill掉node-1上的 NameNode,如下图

file

再次查看发现 node-2 已经 active
file

在启动node-1上的 NameNode
[root@node-1 logs]# /usr/local/hadoop-3.1.3/bin/hdfs --daemon start namenode
file

Comments Closed.