Rocky Linux 9系统部署kubernetes集群
鉴于k8s移除默认的dockershim,centos7生命周期结束,docker镜像无法拉取等,上一篇教程已不适用,更新此文章。
多master,高可用架构
- 系统:
Rocky Linux 9
- 版本:
kubernetes
1.34 - 运行时:
cri-docker
- 网络插件:
calico
基础配置
rocklinux 网络配置
vim /etc/NetworkManager/system-connections/xx
method=manual
address=172.10.10.175/24,172.10.10.1
dns=8.8.8.8
systemctl restart NetworkManager && ip addr
hosts配置
cat >> /etc/hosts << EOF
172.10.10.171 m1
172.10.10.172 m2
172.10.10.173 m3
172.10.10.174 n4
172.10.10.175 n5
EOF
修改主机名
hostnamectl set-hostname xx
更换yum源
sed -e 's|^mirrorlist=|#mirrorlist=|g'
-e 's|^#baseurl=http://dl.rockylinux.org/$contentdir|baseurl=https://mirrors.aliyun.com/rockylinux|g'
-i.bak
/etc/yum.repos.d/[Rr]ocky*.repo
# 刷新缓存
dnf makecache
防火墙
systemctl disable --now firewalld.service
systemctl status firewalld.service
SELinux
永久关闭
sed -ri 's#(SELINUX=)enforcing#1disabled#g' /etc/selinux/config
临时关闭
selinux setenforce 0
查看状态 getenforce
免密登录
pass
时间同步
# 安装时间同步 dnf install chrony -y
# 编辑配置文件加入一下内容 vim /etc/chrony.conf
pool ntp1.aliyun.com iburst
pool ntp2.aliyun.com iburst
pool cn.pool.ntp.org iburst
# 配置开机自启 systemctl enable --now chronyd
# 测试 chronyc sources
禁用swap
# 临时禁用
swapoff -a
# 永久禁用
sed -i 's/.*swap.*/#&/' /etc/fstab
修改内核参数
# k8s配置文件
cat >> /etc/sysctl.d/k8s.conf << EOF
#内核参数调整
vm.swappiness=0
#配置iptables参数,使得流经网桥的流量也经过iptables/netfilter防火墙
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF
# 加载网桥过滤模块
cat <<EOF | tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF
modprobe overlay
modprobe br_netfilter
# 重新加载
sysctl --system
# 检测
lsmod | grep br_netfilter
# 返回如下内容表示成功
# br_netfilter 32768 0
# bridge 303104 1 br_netfilter
配置ipvs
# 安装ipset和ipvsadm
dnf install ipset ipvsadm -y
# 添加需要加载的模块写入脚本文件
cat <<EOF | sudo tee /etc/modules-load.d/ipvs.conf
overlay
ip_vs
ip_vs_rr
ip_vs_wrr
ip_vs_sh
nf_conntrack
EOF
modprobe overlay
modprobe ip_vs && modprobe ip_vs_rr && modprobe ip_vs_wrr && modprobe ip_vs_sh && modprobe nf_conntrack
#查看模块是否加载成功
lsmod | grep -e ip_vs -e nf_conntrack_ipv4
安装配置docker
# 添加阿里云docker仓库
dnf config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
# 切换源
sed -i 's+download.docker.com+mirrors.aliyun.com/docker-ce+' /etc/yum.repos.d/docker-ce.repo
# 更新源数据
dnf makecache
# 安装最新版 dnf install docker-ce -y
cat > /etc/docker/daemon.json <<EOF
{
"registry-mirrors": [
"https://dockerhub.azk8s.cn",
"https://docker.mirrors.ustc.edu.cn",
"http://hub-mirror.c.163.com",
"https://mirror.ccs.tencentyun.com",
"https://nfvzt07v.mirror.aliyuncs.com",
"https://ba301968e4af4e539e8a64abc60c9ff5.mirror.swr.myhuaweicloud.com"
],
"max-concurrent-downloads": 10,
"log-driver": "json-file",
"log-level": "warn",
"log-opts": {
"max-size": "10m",
"max-file": "3"
},
"data-root": "/var/lib/docker",
"exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
systemctl daemon-reload && systemctl restart docker && systemctl enable docker
运行时cri-docker
wget https://github.com/Mirantis/cri-dockerd/releases/download/v0.3.20/cri-dockerd-0.3.20.amd64.tgz
tar -zxvf cri-dockerd-0.3.20.amd64.tgz
# 拷贝并设置执行权限
cp cri-dockerd/cri-dockerd /usr/bin/ && chmod +x /usr/bin/cri-dockerd
# 系统服务cri-docker.service
cat <<"EOF" > /usr/lib/systemd/system/cri-docker.service
[Unit]
Description=CRI Interface for Docker Application Container Engine
Documentation=https://docs.mirantis.com
After=network-online.target firewalld.service docker.service
Wants=network-online.target
Requires=cri-docker.socket
[Service]
Type=notify
ExecStart=/usr/bin/cri-dockerd --network-plugin=cni --pod-infra-container-image=registry.aliyuncs.com/google_containers/pause:3.10
ExecReload=/bin/kill -s HUP $MAINPID
TimeoutSec=0
RestartSec=2
Restart=always
StartLimitBurst=3
StartLimitInterval=60s
LimitNOFILE=infinity
LimitNPROC=infinity
LimitCORE=infinity
TasksMax=infinity
Delegate=yes
KillMode=process
[Install]
WantedBy=multi-user.target
EOF
### 添加cri-docker套接字
cat <<"EOF" > /usr/lib/systemd/system/cri-docker.socket
[Unit]
Description=CRI Docker Socket for the API
PartOf=cri-docker.service
[Socket]
ListenStream=%t/cri-dockerd.sock
SocketMode=0660
SocketUser=root
SocketGroup=docker
[Install]
WantedBy=sockets.target
EOF
systemctl daemon-reload && systemctl enable cri-docker
systemctl start cri-docker
# 查看启动状态
systemctl is-active cri-docker # 输出结果为active表示启动正常
# 如果启动失败,可以通过以下命令查看日志
# journalctl -u cri-docker
rocklinux 在所有master节点部署nginx及keepalived
#安装配置nginx
#nginx yum官方安装源配置
sudo tee /etc/yum.repos.d/nginx.repo << 'EOF'
[nginx-stable]
name=nginx stable repo
baseurl=http://nginx.org/packages/centos/$releasever/$basearch/
gpgcheck=1
enabled=1
gpgkey=https://nginx.org/keys/nginx_signing.key
module_hotfixes=true
EOF
#安装
yum remove nginx -y
yum install nginx -y
# 编辑ngin配置文件
echo > /etc/nginx/nginx.conf
cat <<"EOF" > /etc/nginx/nginx.conf
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log;
pid /run/nginx.pid;
include /usr/share/nginx/modules/*.conf;
events {
worker_connections 1024;
}
# 四层负载均衡,为两台Master apiserver组件提供负载均衡
stream {
log_format main '$remote_addr $upstream_addr - [$time_local] $status $upstream_bytes_sent';
access_log /var/log/nginx/k8s-access.log main;
upstream k8s-apiserver {
server 172.10.10.171:6443; # master1 apiserver
server 172.10.10.172:6443; # master2 apiserver
server 172.10.10.173:6443; # master3 apiserver
}
server {
listen 64430; # 由于nginx与master节点复用,这个监听端口不能是6443,否则会冲突
proxy_pass k8s-apiserver;
}
}
http {
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 65;
types_hash_max_size 2048;
include /etc/nginx/mime.types;
default_type application/octet-stream;
server {
listen 8000 default_server;
server_name _;
location / {
}
}
}
EOF
# 启动nginx,需要按顺序启动
# systemctl start nginx && systemctl enable nginx
安装配置keepalived
# 安装
yum install -y keepalived
# 编辑keepalive配置文件
#注意每台虚拟机keepalived.conf的带#注释部分需设置不一样的
mv /etc/keepalived/keepalived.conf /etc/keepalived/keepalived.bak
cat <<"EOF" > /etc/keepalived/keepalived.conf
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server 127.0.0.1
smtp_connect_timeout 30
router_id NGINX_MASTER
}
vrrp_script check_nginx {
script "/etc/keepalived/check_nginx.sh"
}
vrrp_instance VI_1 {
state BACKUP
interface ens33 # 修改为实际网卡名,根据实际网卡名字设置
virtual_router_id 1 # VRRP 路由 ID实例,三台虚拟机设置成一样
priority 100 # 优先级,三台虚拟机设置成不一样
advert_int 1 # 指定VRRP 心跳包通告间隔时间,默认1秒
authentication {
auth_type PASS
auth_pass 1111
}
# 虚拟IP
virtual_ipaddress {
172.10.10.200/24
}
track_script {
check_nginx
}
}
EOF
#创建keepalived用的判断故障脚本
cat <<"EOF" > /etc/keepalived/check_nginx.sh
#!/bin/bash
count=$(ps -ef |grep nginx | grep sbin | egrep -cv "grep|$$")
if [ "$count" -eq 0 ];then
systemctl stop keepalived
fi
EOF
chmod +x /etc/keepalived/check_nginx.sh
#注:keepalived根据脚本返回状态码(0为工作正常,非0不正常)判断是否故障转移。
#启动keepalived,需要按顺序启动
# systemctl start keepalived && systemctl enable keepalived
#测试
安装k8s集群
安装
# 配置阿里源
cat <<EOF | tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.34/rpm/
enabled=1
gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.34/rpm/repodata/repomd.xml.key
EOF
# 可安装版本查看
dnf list kubelet --showduplicates | sort -r
# 安装
dnf install -y kubelet kubeadm kubectl
#kubelet 运行在集群所有节点上,用于启动Pod和容器等对象的工具
#kubeadm 用于初始化集群,启动集群的命令工具
#kubectl 用于和集群通信的命令行,通过kubectl可以部署和管理应用,查看各种资源,创建、删除和更新各种组件
# 启动kubelet并设置开机自启
systemctl enable kubelet && systemctl start kubelet
# kubectl命令补全
echo "source <(kubectl completion bash)" >> ~/.bash_profile
source .bash_profile
# 查看拉取的镜像
kubeadm config images list --image-repository=registry.aliyuncs.com/google_containers
# 预先拉取镜像 --cri-socket 指定拉取时使用的容器运行时
kubeadm config images pull --image-repository=registry.aliyuncs.com/google_containers --cri-socket unix:///var/run/cri-dockerd.sock
初始化
# 多master方式
# master节点执行
##### 启动nginx keeplived
#启动顺序,先在master1启动,确认vip已在master1生效后,再启动master2,master3的
systemctl start nginx && systemctl enable nginx
systemctl start keepalived && systemctl enable keepalived
# 在第一台master初始化
kubeadm init --kubernetes-version v1.33.4
--apiserver-advertise-address=172.10.10.171
--image-repository registry.aliyuncs.com/google_containers
--service-cidr=10.96.0.0/12
--pod-network-cidr=10.244.0.0/16
--upload-certs
--control-plane-endpoint 172.10.10.200:64430
--cri-socket=unix:///var/run/cri-dockerd.sock
# 172.10.10.200是vip
#记录上述面命令安装成功后的输出,后面需要这个命令将node节点和其他master节点加入集群中。
#注意master节点加入和node节点加入使用的参数是不一样的,如下图,上面一条是master用的、下面一条是node用的
#参数说明
#--apiserver-advertise-address指定API Server地址
#--apiserver-bind-port指定绑定的API Server端口,默认值为6443
#--ignore-preflight-errors忽视检查项错误列表,例如IsPrivilegedUser,Swap,如填写为 all 则将忽视所有的检查项错误.已关闭swap时,可不用该参数
#--kubernetes-version指定Kubernetes版本
#--pod-network-cidr指定pod网络IP地址段
#--service-cidr指定service的IP地址段
#--service-dns-domain指定Service的域名,默认为cluster.local
#--token指定token--token-ttl指定token有效时间,如果设置为0,则永不过期
#--image-repository指定镜像仓库地址,默认为k8s.gcr.io
#--upload-certs
#- 在高可用(HA)集群中,当你使用 kubeadm join
命令添加新的控制平面节点时(即 kubeadm join ... --control-plane
)
#如果 --upload-certs
已启用,新节点会自动从 kubeadm-certs
Secret 中下载并解密这些证书,而无需手动复制。
#--upload-certs
**必须与 --control-plane
一起用于 kubeadm join
**,否则新节点不会尝试下载证书。
设置配置文件
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
# 复制到其他master节点,node节点不需要
# scp -r $HOME/.kube s2:$HOME/
# scp -r $HOME/.kube s3:$HOME/
# 查看主节点状态
kubectl get componentstatuses
#输出如下信息
#NAME STATUS MESSAGE ERROR
#scheduler Healthy ok
#controller-manager Healthy ok
#etcd-0 Healthy ok
其余主节点加入
kubeadm join 172.10.10.200:64430 --token fslukr.ymhh7g5zm94ywdbx
--discovery-token-ca-cert-hash sha256:1a2f74d821f1cdfc243ba3e4f7e3f826b135867926fb27fbcf6e236dd062565f
--control-plane
--certificate-key 6b3c22c6fe9debfb2b10bf3f85551f39de7236528ca27647e6093f522325b1c3
--cri-socket=unix:///var/run/cri-dockerd.sock
node点加入集群
# 所有从节点执行
kubeadm join 172.10.10.200:64430 --token fslukr.ymhh7g5zm94ywdbx
--discovery-token-ca-cert-hash sha256:1a2f74d821f1cdfc243ba3e4f7e3f826b135867926fb27fbcf6e236dd062565f
--cri-socket unix:///var/run/cri-dockerd.sock
查看状态
kubectl get nodes
# 当前集群是NotReady状态,还需要配置网络后才可用
NAME STATUS ROLES AGE VERSION
m1 NotReady control-plane 40m v1.33.4
m2 NotReady control-plane 36m v1.33.4
m3 NotReady control-plane 35m v1.33.4
n4 NotReady <none> 9m54s v1.33.4
n5 NotReady <none> 9m46s v1.33.4
kubectl get pods --all-namespaces
#coredns 未运行
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system coredns-757cc6c8f8-26tzd 0/1 Pending 0 31m
kube-system coredns-757cc6c8f8-bn27f 0/1 Pending 0 31m
kube-system etcd-m1 1/1 Running 0 31m
kube-system etcd-m2 1/1 Running 0 26m
kube-system etcd-m3 1/1 Running 0 26m
kube-system kube-apiserver-m1 1/1 Running 0 31m
kube-system kube-apiserver-m2 1/1 Running 0 26m
kube-system kube-apiserver-m3 1/1 Running 0 26m
kube-system kube-controller-manager-m1 1/1 Running 0 31m
kube-system kube-controller-manager-m2 1/1 Running 0 26m
kube-system kube-controller-manager-m3 1/1 Running 0 26m
kube-system kube-proxy-7jklp 1/1 Running 0 31m
kube-system kube-proxy-gxjsl 1/1 Running 0 32s
kube-system kube-proxy-jm658 1/1 Running 0 26m
kube-system kube-proxy-nq9m9 1/1 Running 0 40s
kube-system kube-proxy-rsdj7 1/1 Running 0 26m
kube-system kube-scheduler-m1 1/1 Running 0 31m
kube-system kube-scheduler-m2 1/1 Running 0 26m
kube-system kube-scheduler-m3 1/1 Running 0
calico网络
# master 执行
wget https://docs.projectcalico.org/manifests/calico.yaml --no-check-certificate
#修改calico.yaml (忽略此步骤) ...
#当使用kubeadm时,PodIP的范围应该与kubeadm init的清单文件中的"podSubnet"字段或者"--pod-network-cidr"选项填写的值一样
#可先拉镜像,或者修改为阿里镜像
#查看calico.yaml用了哪些容器,并在所有节点上pull容器
cat calico.yaml |grep image:
docker pull docker.io/calico/cni:v3.25.0
docker pull docker.io/calico/kube-controllers:v3.25.0
docker pull docker.io/calico/node:v3.25.0
#在该主节点执行安装
kubectl apply -f calico.yaml
kubectl get nodes
# 当前集群是ready状态
NAME STATUS ROLES AGE VERSION
m1 Ready control-plane 44m v1.33.4
m2 Ready control-plane 39m v1.33.4
m3 Ready control-plane 39m v1.33.4
n4 Ready <none> 13m v1.33.4
n5 Ready <none> 13m v1.33.4
#查看pod状态
kubectl get pods --all-namespaces
#状态应当全部是running,#coredns 已运行
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system calico-kube-controllers-7498b9bb4c-wvgmv 1/1 Running 0 3m3s
kube-system calico-node-7lmbk 1/1 Running 0 3m3s
kube-system calico-node-cgtcr 1/1 Running 0 3m3s
kube-system calico-node-lvbs5 1/1 Running 0 3m3s
kube-system calico-node-m687j 1/1 Running 0 3m3s
kube-system calico-node-m6956 1/1 Running 0 3m3s
kube-system coredns-757cc6c8f8-26tzd 1/1 Running 0 43m
kube-system coredns-757cc6c8f8-bn27f 1/1 Running 0 43m
kube-system etcd-m1 1/1 Running 0 43m
kube-system etcd-m2 1/1 Running 0 38m
kube-system etcd-m3 1/1 Running 0 38m
kube-system kube-apiserver-m1 1/1 Running 0 43m
kube-system kube-apiserver-m2 1/1 Running 0 38m
kube-system kube-apiserver-m3 1/1 Running 0 38m
kube-system kube-controller-manager-m1 1/1 Running 0 43m
kube-system kube-controller-manager-m2 1/1 Running 0 38m
kube-system kube-controller-manager-m3 1/1 Running 0 38m
kube-system kube-proxy-7jklp 1/1 Running 0 43m
kube-system kube-proxy-gxjsl 1/1 Running 0 12m
kube-system kube-proxy-jm658 1/1 Running 0 38m
kube-system kube-proxy-nq9m9 1/1 Running 0 12m
kube-system kube-proxy-rsdj7 1/1 Running 0 38m
kube-system kube-scheduler-m1 1/1 Running 0 43m
kube-system kube-scheduler-m2 1/1 Running 0 38m
kube-system kube-scheduler-m3 1/1 Running 0 38m
##### kube-proxy 切换使用ipvs
psss
部署完成!