操作系统:CentOS Stream 10

项目架构

服务器IP 主机名 角色 配置
192.168.117.131 k8s-master Master节点 2核4G+
192.168.117.132 k8s-node-1 Node节点 2核4G+
192.168.117.133 k8s-node-2 Node节点 2核4G+
192.168.117.134 nfs NFS存储服务器 1核2G+

第一部分:K8s集群基础部署

1. 系统初始化(所有节点执行)

1.1 网络与主机名配置

# 配置静态IP(示例为master节点,其他节点修改IP)
cd /etc/NetworkManager/system-connections/
vim ens160.nmconnection

[ipv4]
method=manual
address1=192.168.117.131/24,192.168.117.2
dns=114.114.114.114

nmcli connection reload && nmcli device up ens160

# 设置主机名(各节点分别执行)
hostnamectl set-hostname k8s-master      # 131节点
hostnamectl set-hostname k8s-node-1      # 132节点
hostnamectl set-hostname k8s-node-2      # 133节点
hostnamectl set-hostname nfs      # 134节点

# 所有节点配置hosts(包含NFS服务器)
cat >> /etc/hosts << EOF
192.168.117.131  k8s-master
192.168.117.132  k8s-node-1
192.168.117.133  k8s-node-2
192.168.117.134  nfs
EOF

1.2 系统优化

# 关闭防火墙和SELinux
setenforce 0
sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config
systemctl stop firewalld && systemctl disable firewalld

# 关闭Swap
swapoff -a && sed -i '/swap/d' /etc/fstab

# 内核参数配置
cat <<EOF | tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF
modprobe overlay && modprobe br_netfilter

cat <<EOF | tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables  = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward                 = 1
EOF
sysctl --system

2. Containerd 运行时部署(K8s节点:131-133)

# 安装containerd 2.2.1
dnf install -y dnf-plugins-core yum-utils
yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
dnf install containerd.io-2.2.1 -y

# 配置containerd
mkdir -p /etc/containerd
containerd config default | tee /etc/containerd/config.toml
sed -i 's/SystemdCgroup = false/SystemdCgroup = true/g' /etc/containerd/config.toml
sed -i 's|registry.k8s.io/pause:3.10|registry.aliyuncs.com/google_containers/pause:3.10.1|g' /etc/containerd/config.toml

systemctl restart containerd && systemctl enable containerd

3. K8s 集群部署(K8s节点:131-133)

3.1 安装组件

cat <<EOF | tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://pkgs.k8s.io/core:/stable:/v1.35/rpm/
enabled=1
gpgcheck=1
gpgkey=https://pkgs.k8s.io/core:/stable:/v1.35/rpm/repodata/repomd.xml.key
EOF

dnf install -y kubelet-1.35.0 kubeadm-1.35.0 kubectl-1.35.0 --disableexcludes=kubernetes
systemctl enable kubelet

3.2 Master 初始化(131节点)

# 预拉取镜像
kubeadm config images pull --image-repository registry.aliyuncs.com/google_containers --kubernetes-version v1.35.0

# 初始化集群
kubeadm init \
--apiserver-advertise-address=192.168.117.131 \
--kubernetes-version=v1.35.0 \
--image-repository=registry.aliyuncs.com/google_containers \
--service-cidr=10.96.0.0/12 \
--pod-network-cidr=10.244.0.0/16

# 配置kubectl
mkdir -p $HOME/.kube && cp -i /etc/kubernetes/admin.conf $HOME/.kube/config && chown $(id -u):$(id -u) $HOME/.kube/config

3.3 Node 加入(132、133节点)

# 在master生成join命令
kubeadm token create --print-join-command

# 在node节点执行生成的命令
kubeadm join 192.168.117.131:6443 --token xxx --discovery-token-ca-cert-hash sha256:xxx

4. Calico 网络插件部署

4.1 所有 K8s 节点(131-133)预拉取镜像

# 下载Calico v3.29.0镜像(适配K8s 1.35)
ctr images pull swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/calico/node:v3.29.0
ctr images tag swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/calico/node:v3.29.0 docker.io/calico/node:v3.29.0

ctr images pull swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/calico/cni:v3.29.0
ctr images tag swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/calico/cni:v3.29.0 docker.io/calico/cni:v3.29.0

ctr images pull swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/calico/kube-controllers:v3.29.0
ctr images tag swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/calico/kube-controllers:v3.29.0 docker.io/calico/kube-controllers:v3.29.0

# 导入到k8s.io命名空间
ctr -n k8s.io images import <(ctr images export - docker.io/calico/node:v3.29.0)
ctr -n k8s.io images import <(ctr images export - docker.io/calico/cni:v3.29.0)
ctr -n k8s.io images import <(ctr images export - docker.io/calico/kube-controllers:v3.29.0)

4.2 Master 节点部署

wget https://raw.githubusercontent.com/projectcalico/calico/v3.29.0/manifests/calico.yaml -O calico.yaml
sed -i 's|value: "192.168.0.0/16"|value: "10.244.0.0/16"|g' calico.yaml
kubectl apply -f calico.yaml

4.3 启用 IPVS

kubectl edit configmap kube-proxy -n kube-system
# 修改配置
mode: "ipvs"

# 删除所有kube-proxy pod使之重启
kubectl delete pods -n kube-system -l k8s-app=kube-proxy

第二部分:Web服务与存储部署

5. MetalLB 负载均衡部署(单 IP 模式)

# 部署MetalLB
kubectl apply -f https://raw.githubusercontent.com/metallb/metallb/v0.15.3/config/manifests/metallb-native.yaml

# 开启kube-proxy strictARP
kubectl get configmap kube-proxy -n kube-system -o yaml | \
sed -e "s/strictARP: false/strictARP: true/" | \
kubectl apply -f - -n kube-system

# 配置IP地址池(保持原有单IP配置:192.168.117.188)
cat > metallb.yaml << EOF
apiVersion: metallb.io/v1beta1
kind: IPAddressPool
metadata:
  name: first-pool
  namespace: metallb-system
spec:
  addresses:
  - 192.168.117.188/32   #虚拟IP(单IP模式)
---
apiVersion: metallb.io/v1beta1
kind: L2Advertisement
metadata:
  name: example
  namespace: metallb-system
spec:
  ipAddressPools:
  - first-pool
EOF

kubectl apply -f metallb.yaml

# 查看MetalLB组件状态
kubectl get pods -n metallb-system
kubectl get IPaddresspools -n metallb-system
kubectl get L2advertisements -n metallb-system
# 测试LoadBalancer服务(验证IP分配)
kubectl create deployment nginx-test --image=nginx
kubectl expose deployment nginx-test --port=80 --type=LoadBalancer
kubectl get svc nginx-test
# 预期输出:EXTERNAL-IP显示为地址池内IP,非<pending>
# 测试完要删除ngxin-test
kubectl delete svc nginx-test

6. Ingress 控制器部署

# 创建nginx-ingress命名空间
kubectl create namespace nginx-ingress

# 安装Ingress Nginx Controller(适配K8s 1.35)
kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.15.1/deploy/static/provider/baremetal/deploy.yaml

# 等待Ingress Pod完全启动
kubectl get pods -n ingress-nginx -w

# 查看Ingress Service(MetalLB将分配192.168.117.188)
kubectl get svc -n ingress-nginx ingress-nginx-controller
# 预期输出:EXTERNAL-IP为192.168.117.188

7. NFS 服务器部署(134节点)

7.1 服务端配置

# 安装NFS服务
yum install -y nfs-utils rpcbind

# 创建共享目录
mkdir -p /web/html
echo "<h1>K8s NFS Test</h1>" > /web/html/index.html

# 配置exports(允许集群网段访问)
cat >> /etc/exports << EOF
/web/html  192.168.117.0/24(rw,sync,all_squash,no_root_squash)
EOF

# 启动服务
systemctl enable --now nfs-server rpcbind
#添加静态资源到/web/html
#暴露挂载点
exportfs -rv
showmount -e 127.0.0.1

7.2 K8s 节点安装 NFS 客户端(131-133)

yum install -y nfs-utils

# 测试挂载(验证连通性)
mount -t nfs 192.168.117.134:/web/html /mnt
ls /mnt
umount /mnt

8. Nginx Web 服务部署(含NFS持久化)

8.1 创建 PV/PVC

# 创建PV(指向NFS服务器)
cat > pv.yaml << EOF
apiVersion: v1
kind: PersistentVolume
metadata:
  name: nfs-pv-html
spec:
  capacity:
    storage: 1Gi
  accessModes:
    - ReadWriteMany
  persistentVolumeReclaimPolicy: Retain
  nfs:
    path: /web/html
    server: 192.168.117.134
EOF

# 创建PVC
cat > pvc.yaml << EOF
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: nfs-pvc-html
spec:
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: 1Gi
EOF

kubectl apply -f pv.yaml -f pvc.yaml

8.2 部署 Nginx 应用

cat > nginx-deployment.yaml << EOF
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nginx-web
spec:
  replicas: 10
  selector:
    matchLabels:
      app: nginx
  template:
    metadata:
      labels:
        app: nginx
    spec:
      containers:
      - name: nginx
        image: nginx:latest
        ports:
        - containerPort: 80
        volumeMounts:
        - name: nfs-html
          mountPath: /usr/share/nginx/html
      volumes:
      - name: nfs-html
        persistentVolumeClaim:
          claimName: nfs-pvc-html
EOF

kubectl apply -f nginx-deployment.yaml

8.3 配置 Ingress 访问

cat > nginx-ingress.yaml << EOF
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: web-ingress
  annotations:
    kubernetes.io/ingress.class: "nginx"
    nginx.ingress.kubernetes.io/ssl-redirect: "false"
    nginx.ingress.kubernetes.io/force-ssl-redirect: "false"
spec:
  rules:
  - http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: nginx-service   #与service名称相同
            port:
              number: 80
EOF

# 先创建ClusterIP Service供Ingress使用
cat > nginx-service.yaml << EOF
apiVersion: v1
kind: Service
metadata:
  name: nginx-service
spec:
  selector:
    app: nginx
  ports:
  - port: 80
    targetPort: 80
  type: ClusterIP
EOF

kubectl apply -f nginx-service.yaml -f nginx-ingress.yaml

# 查看Ingress状态
kubectl get ingress
# 测试访问:curl http://192.168.117.188/(应显示NFS中的index.html内容)

9.自动扩缩容(HPA)

9.1 安装 Metrics

wget https://xuzhibin-bucket.oss-cn-beijing.aliyuncs.com/k8s/metric-server.yaml
kubectl apply -f metric-server.yaml

9.2 修改 nginx-deployment.yaml

#在 spec: 添加
        resources:
          limits:
            cpu: 500m
          requests:
            cpu: 200m

9.3 滚动升级

kubectl rollout restart deploy nginx-web

9.4 创建 HPA

kubectl autoscale deploy nginx:web --cup=50% --min=2 --max=10

9.5 监听 HPA 状态

kubectl get hpa nginx:web --watch

9.6 压力测试

kubectl run -i --tty load-generator --rm --image=busybox:1.28 --restart=Never -- /bin/sh -c "while sleep 0.01; do wget -q -O- http://nginx:web; done"

9.7 删除 HPA

kubectl delete hpa nginx-web

10. 监控体系部署(Prometheus+Grafana)

10.1 前置准备

# 安装openssl工具(校验安装包完整性)
yum install -y openssl

# 安装helm
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash

# 验证helm
helm version

# 添加官方监控仓库
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update

# 创建监控命名空间
kubectl create namespace monitoring

10.2 配置 Kubernetes 组件监控(关键步骤)

默认情况下,kube-scheduler、kube-controller-manager、kube-proxy、etcd 等核心组件监听 127.0.0.1,Prometheus 无法直接访问。需要将 bind-address 从 127.0.0.1 修改为 0.0.0.0。

在 Master 节点执行
# ========== 修改 kube-scheduler ==========
sudo cp /etc/kubernetes/manifests/kube-scheduler.yaml{,.bak}
sudo sed -i 's/--bind-address=127.0.0.1/--bind-address=0.0.0.0/' /etc/kubernetes/manifests/kube-scheduler.yaml
sudo sed -i '/--bind-address=0.0.0.0/a\    - --secure-port=10259\n    - --authorization-always-allow-paths=/healthz,/readyz,/livez,/metrics' /etc/kubernetes/manifests/kube-scheduler.yaml

# ========== 修改 kube-controller-manager ==========
sudo cp /etc/kubernetes/manifests/kube-controller-manager.yaml{,.bak}
sudo sed -i 's/--bind-address=127.0.0.1/--bind-address=0.0.0.0/' /etc/kubernetes/manifests/kube-controller-manager.yaml
sudo sed -i '/--bind-address=0.0.0.0/a\    - --secure-port=10257\n    - --authorization-always-allow-paths=/healthz,/readyz,/livez,/metrics' /etc/kubernetes/manifests/kube-controller-manager.yaml

# ========== 修改 etcd ==========
sudo cp /etc/kubernetes/manifests/etcd.yaml{,.bak}
sudo sed -i '/- --listen-peer-urls=/a\    - --listen-metrics-urls=http://0.0.0.0:2381' /etc/kubernetes/manifests/etcd.yaml

# 重启 kubelet
sudo systemctl restart kubelet
在所有节点执行
# ========== 修改 kube-proxy ==========
kubectl get configmap kube-proxy -n kube-system -o yaml | \
  sed 's/metricsBindAddress: ""/metricsBindAddress: "0.0.0.0:10249"/' | \
  sed 's/metricsBindAddress: 127.0.0.1:10249/metricsBindAddress: 0.0.0.0:10249/' | \
  kubectl apply -f -

# 重启 kube-proxy(删除 Pod 自动重建)
kubectl delete pod -l k8s-app=kube-proxy -n kube-system

# 等待重建完成
sleep 10
kubectl get pods -n kube-system -l k8s-app=kube-proxy
验证所有组件
# 检查 scheduler、controller-manager、etcd
kubectl get pods -n kube-system | grep -E "scheduler|controller-manager|etcd"

# 检查 kube-proxy(所有节点都应该 Running)
kubectl get pods -n kube-system -l k8s-app=kube-proxy -o wide

10.3 创建 Helm 自定义配置

创建 prometheus-values.yaml 
cat > prometheus-values.yaml << 'EOF'
# ==========================================
# Grafana 配置
# ==========================================
grafana:
  service:
    type: LoadBalancer
  admin:
    password: admin123
  persistence:
    enabled: true
    size: 10Gi

# ==========================================
# Prometheus 配置
# ==========================================
prometheus:
  service:
    type: LoadBalancer
  prometheusSpec:
    retention: 15d
    storageSpec:
      volumeClaimTemplate:
        spec:
          accessModes: ["ReadWriteOnce"]
          resources:
            requests:
              storage: 50Gi

# ==========================================
# Kubernetes 核心组件监控配置
# ==========================================

# kube-scheduler 监控
kubeScheduler:
  enabled: true
  service:
    port: 10259
    targetPort: 10259
  serviceMonitor:
    enabled: true
    https: true
    insecureSkipVerify: true

# kube-controller-manager 监控
kubeControllerManager:
  enabled: true
  service:
    port: 10257
    targetPort: 10257
  serviceMonitor:
    enabled: true
    https: true
    insecureSkipVerify: true

# kube-proxy 监控
kubeProxy:
  enabled: true

# etcd 监控
kubeEtcd:
  enabled: true
  service:
    port: 2381
    targetPort: 2381
  serviceMonitor:
    enabled: true
    scheme: http

# kubelet 监控
kubelet:
  enabled: true
  serviceMonitor:
    enabled: true
    https: true
    insecureSkipVerify: true

# 告警管理器
alertmanager:
  enabled: true
EOF

10.4 部署 Prometheus + Grafana

# 使用自定义配置安装
helm install prometheus prometheus-community/kube-prometheus-stack \
  -n monitoring \
  -f prometheus-values.yaml \
  --wait

# 如果已安装,执行升级
# helm upgrade prometheus prometheus-community/kube-prometheus-stack \
#   -n monitoring \
#   -f prometheus-values.yaml \
#   --wait

10.5 验证部署

# 验证所有监控 Pod 正常运行(全部为 Running 状态即为成功)
kubectl get pods -n monitoring

# 查看 Prometheus 和 Grafana 的 LoadBalancer IP(由 MetalLB 分配)
kubectl get svc -n monitoring | grep -E "prometheus|grafana"

# 预期输出示例:
# prometheus-grafana                 LoadBalancer   10.96.123.45    192.168.117.188   80:30000/TCP,443:30443/TCP
# prometheus-kube-prometheus-prometheus   LoadBalancer   10.96.123.46    192.168.117.189   9090:30001/TCP

10.6 访问监控界面

# 获取 Grafana 登录密码(账号默认 admin)
kubectl get secret -n monitoring prometheus-grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo

访问地址:

  • Grafana: http://k8s节点IP:31356(用户名:admin,密码:上一步获取的密码)

  • Prometheus: http://k8s节点IP:30090

10.7 导入 Grafana 仪表盘

  1. 登录 Grafana 后,左侧菜单 → DashboardsImport

  2. 推荐导入以下官方仪表盘:

    • 3119 - Kubernetes cluster monitoring(集群监控)

    • 6417 - Kubernetes Pods(Pod 监控)

    • 315 - Kubernetes Node Exporter(节点监控)


项目总结

本文档完成了4节点测试级K8s集群的完整搭建:

  1. 基础设施层:CentOS Stream 10 + Containerd 2.2.1 + Calico v3.29.0 网络

  2. 流量管理层:MetalLB单VIP模式(192.168.117.188)+ Ingress Nginx 七层路由

  3. 应用部署层:Nginx 弹性 Pod + NFS 持久化存储(独立节点134)

  4. 可观测性层:Prometheus + Grafana 全链路监控

Logo

有“AI”的1024 = 2048,欢迎大家加入2048 AI社区

更多推荐