feat(scripts): 添加多项 Kubernetes 集群和 Signoz 相关配置及安装脚本
All checks were successful
continuous-integration/drone/push Build is passing

- 新增 flannel 网络插件两种配置文件,包含 Namespace、ServiceAccount、
  ConfigMap、DaemonSet 和 RBAC 权限等完整资源定义
- 创建 Signoz ClickHouse 手动初始化脚本,完成数据库创建、schema 迁移
  及状态校验,支持交互式运行和日志查询
- 新增 K3s 高可用集群安装脚本,支持 server 和 agent 两种模式,含参数
  解析、环境配置、镜像源设置及安装流程
- 添加 Signoz ClickHouseClickHouseInstallation 自定义资源 Patch,支持
  本地 histogram-quantile 插件初始化
- 新建 Signoz 最新存储类配置 values 文件,配置多组件使用 standard-storage
- 创建标准存储类 StorageClass 资源 yaml,基于 rancher.io/local-path 方案,支持动态扩容
This commit is contained in:
danial
2025-12-18 16:43:04 +08:00
parent 5a88d8e0d6
commit 467ccffb3e
9 changed files with 821 additions and 0 deletions

View File

@@ -0,0 +1,186 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: kube-flannel
labels:
pod-security.kubernetes.io/enforce: privileged
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: flannel
namespace: kube-flannel
---
kind: ConfigMap
apiVersion: v1
metadata:
name: kube-flannel-cfg
namespace: kube-flannel
labels:
tier: node
app: flannel
data:
cni-conf.json: |
{
"name": "cbr0",
"cniVersion": "0.3.1",
"plugins": [
{
"type": "flannel",
"delegate": {
"hairpinMode": true,
"isDefaultGateway": true
}
},
{
"type": "portmap",
"capabilities": {
"portMappings": true
}
}
]
}
net-conf.json: |
{
"Network": "10.42.0.0/16",
"Backend": {
"Type": "vxlan"
},
"EnableIPv4": true,
"EnableIPv6": false
}
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kube-flannel-ds
namespace: kube-flannel
labels:
tier: node
app: flannel
spec:
selector:
matchLabels:
app: flannel
template:
metadata:
labels:
tier: node
app: flannel
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/os
operator: In
values:
- linux
hostNetwork: true
priorityClassName: system-node-critical
tolerations:
- operator: Exists
effect: NoSchedule
serviceAccountName: flannel
initContainers:
- name: install-cni
image: docker.m.daocloud.io/flannel/flannel-cni-plugin:v0.27.4-flannel1
command:
- cp
args:
- -f
- /flannel
- /etc/cni/net.d/10-flannel.conflist
volumeMounts:
- name: cni
mountPath: /flannel
- name: cni
mountPath: /etc/cni/net.d
containers:
- name: kube-flannel
image: docker.m.daocloud.io/flannel/flannel:v0.27.4
command:
- /opt/bin/flanneld
args:
- --ip-masq
- --kube-subnet-mgr
resources:
requests:
cpu: "100m"
memory: "50Mi"
limits:
cpu: "100m"
memory: "50Mi"
securityContext:
privileged: false
capabilities:
add: ["NET_ADMIN", "NET_RAW"]
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: EVENT_QUEUE_DEPTH
value: "5000"
volumeMounts:
- name: run
mountPath: /run/flannel
- name: flannel-cfg
mountPath: /etc/kube-flannel/
- name: xtables-lock
mountPath: /run/xtables.lock
volumes:
- name: run
hostPath:
path: /run/flannel
- name: flannel-cfg
configMap:
name: kube-flannel-cfg
- name: xtables-lock
hostPath:
path: /run/xtables.lock
type: FileOrCreate
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: flannel
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: flannel
subjects:
- kind: ServiceAccount
name: flannel
namespace: kube-flannel
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: flannel
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- nodes/status
verbs:
- patch

View File

@@ -0,0 +1,199 @@
---
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
name: psp.flannel.unprivileged
annotations:
seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*'
spec:
privileged: true
allowPrivilegeEscalation: true
allowedCapabilities:
- '*'
volumes:
- '*'
hostNetwork: true
hostPorts:
- min: 0
max: 65535
hostIPC: true
hostPID: true
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: flannel
namespace: kube-flannel
---
kind: ConfigMap
apiVersion: v1
metadata:
name: kube-flannel-cfg
namespace: kube-flannel
labels:
tier: node
app: flannel
data:
cni-conf.json: |
{
"name": "cbr0",
"cniVersion": "0.3.1",
"plugins": [
{
"type": "flannel",
"delegate": {
"hairpinMode": true,
"isDefaultGateway": true
}
},
{
"type": "portmap",
"capabilities": {
"portMappings": true
}
}
]
}
net-conf.json: |
{
"Network": "10.42.0.0/16",
"Backend": {
"Type": "vxlan"
},
"EnableIPv4": true,
"EnableIPv6": false
}
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kube-flannel-ds
namespace: kube-flannel
labels:
tier: node
app: flannel
spec:
selector:
matchLabels:
app: flannel
template:
metadata:
labels:
tier: node
app: flannel
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/os
operator: In
values:
- linux
hostNetwork: true
priorityClassName: system-node-critical
tolerations:
- operator: Exists
effect: NoSchedule
serviceAccountName: flannel
initContainers:
- name: install-cni
image: docker.m.daocloud.io/flannel/flannel-cni-plugin:v0.27.4-flannel1
command:
- cp
args:
- -f
- /flannel
- /etc/cni/net.d/10-flannel.conflist
volumeMounts:
- name: cni
mountPath: /flannel
- name: cni
mountPath: /etc/cni/net.d
containers:
- name: kube-flannel
image: docker.m.daocloud.io/flannel/flannel:v0.27.4
command:
- /opt/bin/flanneld
args:
- --ip-masq
- --kube-subnet-mgr
resources:
requests:
cpu: "100m"
memory: "50Mi"
limits:
cpu: "100m"
memory: "50Mi"
securityContext:
privileged: false
capabilities:
add: ["NET_ADMIN", "NET_RAW"]
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: EVENT_QUEUE_DEPTH
value: "5000"
volumeMounts:
- name: run
mountPath: /run/flannel
- name: flannel-cfg
mountPath: /etc/kube-flannel/
- name: xtables-lock
mountPath: /run/xtables.lock
volumes:
- name: run
hostPath:
path: /run/flannel
- name: flannel-cfg
configMap:
name: kube-flannel-cfg
- name: xtables-lock
hostPath:
path: /run/xtables.lock
type: FileOrCreate
---
apiVersion: v1
kind: ClusterRoleBinding
metadata:
name: flannel
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: flannel
subjects:
- kind: ServiceAccount
name: flannel
namespace: kube-flannel
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: flannel
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- nodes/status
verbs:
- patch

View File

@@ -0,0 +1,30 @@
apiVersion: clickhouse.altinity.com/v1
kind: ClickHouseInstallation
metadata:
name: signoz-clickhouse
namespace: signoz
spec:
templates:
podTemplates:
- name: clickhouse
spec:
initContainers:
- name: signoz-clickhouse-udf-init
command:
- /bin/sh
- -c
- |
set -e
# Copy histogram-quantile from local storage instead of downloading
mkdir -p /var/lib/clickhouse/user_scripts
cp /mnt/histogram/histogram-quantile /var/lib/clickhouse/user_scripts/histogramQuantile
chmod +x /var/lib/clickhouse/user_scripts/histogramQuantile
echo "histogram-quantile installed successfully from local copy"
volumeMounts:
- name: histogram-volume
mountPath: /mnt/histogram
volumes:
- name: histogram-volume
hostPath:
path: /kube/storage
type: Directory

View File

@@ -0,0 +1,46 @@
# Signoz Configuration with New StorageClass
# This configures Signoz to use the standard-storage StorageClass
# Global configuration
global:
storageClass: "standard-storage"
clusterName: "k8s-cluster"
# ClickHouse configuration with new storage class
clickhouse:
enabled: true
persistence:
enabled: true
storageClass: "standard-storage"
size: 20Gi
accessModes:
- ReadWriteOnce
# ZooKeeper configuration with new storage class
zookeeper:
enabled: true
persistence:
enabled: true
storageClass: "standard-storage"
size: 8Gi
accessModes:
- ReadWriteOnce
# Signoz main application with new storage class
signoz:
persistence:
enabled: true
storageClass: "standard-storage"
size: 5Gi
accessModes:
- ReadWriteOnce
# Other components
alertmanager:
enabled: true
otelCollector:
enabled: true
queryService:
enabled: true

View File

@@ -0,0 +1,12 @@
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: standard-storage
annotations:
storageclass.kubernetes.io/is-default-class: "true"
provisioner: rancher.io/local-path
reclaimPolicy: Delete
volumeBindingMode: WaitForFirstConsumer
allowVolumeExpansion: true
parameters:
path: "/kube/storage"

141
scripts/scripts/init-signoz-db.sh Executable file
View File

@@ -0,0 +1,141 @@
#!/bin/bash
# 手动初始化 Signoz ClickHouse 数据库脚本
set -e
echo "=== 手动初始化 Signoz ClickHouse 数据库 ==="
echo
NAMESPACE="signoz"
CLICKHOUSE_POD="chi-signoz-clickhouse-cluster-0-0-0"
# 检查 ClickHouse Pod 是否运行
echo "步骤 1: 检查 ClickHouse Pod 状态..."
if ! kubectl get pod $CLICKHOUSE_POD -n $NAMESPACE &>/dev/null; then
echo "错误: 找不到 ClickHouse Pod: $CLICKHOUSE_POD"
exit 1
fi
if [[ $(kubectl get pod $CLICKHOUSE_POD -n $NAMESPACE -o jsonpath='{.status.phase}') != "Running" ]]; then
echo "错误: ClickHouse Pod 未运行"
exit 1
fi
echo "✅ ClickHouse Pod 运行正常"
echo
# 2. 创建 Signoz 数据库
echo "步骤 2: 创建 Signoz 数据库..."
kubectl exec -n $NAMESPACE $CLICKHOUSE_POD -- clickhouse-client --query="
CREATE DATABASE IF NOT EXISTS signoz_metrics;
CREATE DATABASE IF NOT EXISTS signoz_logs;
CREATE DATABASE IF NOT EXISTS signoz_traces;
"
echo "✅ 数据库创建完成"
echo
# 3. 删除现有 schema migrator 作业
echo "步骤 3: 清理现有 schema migrator 作业..."
kubectl delete job -n $NAMESPACE signoz-schema-migrator-sync &>/dev/null || true
kubectl delete job -n $NAMESPACE signoz-schema-migrator-async-init &>/dev/null || true
kubectl delete job -n $NAMESPACE signoz-schema-migrator-sync-init &>/dev/null || true
echo "✅ 旧作业清理完成"
echo
# 4. 手动运行 schema migration
echo "步骤 4: 运行 schema migration..."
# 获取 ClickHouse 连接信息
CLICKHOUSE_HOST="signoz-clickhouse"
CLICKHOUSE_PORT="9000"
CLICKHOUSE_HTTP_PORT="8123"
# 运行 schema migrator sync 作业
cat <<EOF | kubectl apply -f -
apiVersion: batch/v1
kind: Job
metadata:
name: signoz-schema-migrator-manual-sync
namespace: signoz
labels:
app.kubernetes.io/name: signoz
app.kubernetes.io/instance: signoz
app.kubernetes.io/component: schema-migrator
spec:
template:
metadata:
labels:
app.kubernetes.io/name: signoz
app.kubernetes.io/instance: signoz
app.kubernetes.io/component: schema-migrator
spec:
restartPolicy: OnFailure
initContainers:
- name: wait-for-clickhouse
image: busybox:1.35
command:
- sh
- -c
- |
echo "等待 ClickHouse 启动..."
until wget --spider -q http://$CLICKHOUSE_HOST:$CLICKHOUSE_HTTP_PORT/ping; do
echo "ClickHouse 未就绪,等待 5 秒..."
sleep 5
done
echo "ClickHouse 已就绪"
containers:
- name: schema-migrator
image: signoz/signoz-schema-migrator:v0.129.12
env:
- name: CLICKHOUSE_HOST
value: $CLICKHOUSE_HOST
- name: CLICKHOUSE_PORT
value: "$CLICKHOUSE_PORT"
- name: CLICKHOUSE_HTTP_PORT
value: "$CLICKHOUSE_HTTP_PORT"
- name: CLICKHOUSE_CLUSTER
value: "cluster"
- name: CLICKHOUSE_USER
value: "admin"
- name: CLICKHOUSE_PASSWORD
value: "27ff0399-0d3a-4bd8-919d-17c2181e6fb9"
- name: CLICKHOUSE_SECURE
value: "false"
args:
- sync
- "--cluster-name"
- "$(CLICKHOUSE_CLUSTER)"
- "--dsn"
- "tcp://$(CLICKHOUSE_USER):$(CLICKHOUSE_PASSWORD)@signoz-clickhouse:$(CLICKHOUSE_PORT)"
- "--up="
EOF
echo "✅ Schema migrator 作业已创建"
echo
# 5. 等待作业完成
echo "步骤 5: 等待 schema migration 完成..."
kubectl wait --for=condition=complete job/signoz-schema-migrator-manual-sync -n $NAMESPACE --timeout=300s
echo "✅ Schema migration 完成"
echo
# 6. 验证数据库
echo "步骤 6: 验证数据库结构..."
kubectl exec -n $NAMESPACE $CLICKHOUSE_POD -- clickhouse-client --query="SHOW DATABASES" | grep -E "(signoz_metrics|signoz_logs|signoz_traces)"
echo
echo "检查表结构:"
kubectl exec -n $NAMESPACE $CLICKHOUSE_POD -- clickhouse-client --query="SHOW TABLES FROM signoz_metrics" | head -5
kubectl exec -n $NAMESPACE $CLICKHOUSE_POD -- clickhouse-client --query="SHOW TABLES FROM signoz_logs" | head -5
echo
echo "=== 数据库初始化完成 ==="
echo
echo "验证 Signoz 服务:"
echo "1. UI 访问: http://10.206.16.5:30018"
echo "2. API 健康检查: curl http://10.206.16.5:30018/api/v1/health"
echo "3. 检查作业日志: kubectl logs -n $NAMESPACE job/signoz-schema-migrator-manual-sync"

207
scripts/scripts/k3s-install.sh Executable file
View File

@@ -0,0 +1,207 @@
#!/bin/bash
# K3s 高可用集群安装脚本 - 精简版
# 支持主节点和从节点安装使用国内镜像源
set -e
# 配置
MODE="" # server agent
TOKEN="" # 集群令牌
SERVER_URL="" # 主节点地址
NODE_NAME="" # 节点名称
NODE_IP="" # 节点 IP
# 国内镜像源
export INSTALL_K3S_MIRROR=cn
# 颜色
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# 帮助
help() {
echo "K3s 高可用集群安装脚本"
echo
echo "用法: $0 [选项]"
echo
echo "安装模式:"
echo " -m, --mode MODE server: 主节点, agent: 从节点"
echo
echo "从节点参数:"
echo " -t, --token TOKEN 主节点令牌"
echo " -s, --server URL 主节点地址 (如: https://10.206.16.5:6443)"
echo " -n, --name NAME 节点名称"
echo " -i, --ip IP 节点IP"
echo
echo "示例:"
echo " # 主节点"
echo " $0 -m server"
echo
echo " # 从节点"
echo " $0 -m agent -t K10::server -s https://10.206.16.5:6443 -n worker-1 -i 192.168.1.100"
exit 0
}
# 解析参数
while [[ $# -gt 0 ]]; do
case $1 in
-m|--mode) MODE="$2"; shift 2 ;;
-t|--token) TOKEN="$2"; shift 2 ;;
-s|--server) SERVER_URL="$2"; shift 2 ;;
-n|--name) NODE_NAME="$2"; shift 2 ;;
-i|--ip) NODE_IP="$2"; shift 2 ;;
-h|--help) help ;;
*) echo -e "${RED}错误: 未知选项 $1${NC}"; help ;;
esac
done
# 检查参数
[[ -z "$MODE" ]] && { echo -e "${RED}错误: 必须指定安装模式${NC}"; help; exit 1; }
[[ "$MODE" != "server" && "$MODE" != "agent" ]] && { echo -e "${RED}错误: 模式必须是 server 或 agent${NC}"; exit 1; }
[[ "$MODE" == "agent" && (-z "$TOKEN" || -z "$SERVER_URL") ]] && { echo -e "${RED}错误: 从节点必须提供 token 和 server${NC}"; exit 1; }
# 检查权限
[[ $(id -u) -ne 0 ]] && { echo -e "${RED}错误: 需要 root 权限${NC}"; exit 1; }
echo "=== K3s 高可用集群安装 ==="
echo -e "模式: ${GREEN}$MODE${NC}"
echo -e "镜像源: ${GREEN}国内镜像 (INSTALL_K3S_MIRROR=cn)${NC}"
[[ "$MODE" == "agent" ]] && echo -e "主节点: ${GREEN}$SERVER_URL${NC}"
echo
# 1. 卸载旧版本
if command -v k3s &> /dev/null; then
echo "卸载旧版本..."
[[ "$MODE" == "server" ]] && /usr/local/bin/k3s-uninstall.sh || /usr/local/bin/k3s-agent-uninstall.sh
rm -rf /etc/rancher/k3s/ /var/lib/rancher/k3s/
echo "✅ 旧版本已卸载"
fi
# 2. 系统配置
echo "配置系统..."
# 禁用 swap
swapoff -a && sed -i '/ swap / s/^/#/' /etc/fstab
# 加载模块
cat > /etc/modules-load.d/k8s.conf <<EOF
br_netfilter
overlay
EOF
modprobe br_netfilter overlay
# 内核参数
cat > /etc/sysctl.d/k8s.conf <<EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF
sysctl --system
# 安装依赖
command -v apt &> /dev/null && apt update && apt install -y curl
command -v yum &> /dev/null && yum install -y curl
echo "✅ 系统配置完成"
# 3. 配置镜像源
echo "配置镜像源..."
mkdir -p /etc/rancher/k3s/
cat > /etc/rancher/k3s/registries.yaml <<EOF
mirrors:
registry.local.w7.cc:
docker.io:
endpoint:
- "https://mirror.ccs.tencentyun.com"
- "https://registry.cn-hangzhou.aliyuncs.com"
- "https://docker.m.daocloud.io"
- "https://docker.1panel.live"
quay.io:
endpoint:
- "https://quay.m.daocloud.io"
- "https://quay.dockerproxy.com"
gcr.io:
endpoint:
- "https://gcr.m.daocloud.io"
- "https://gcr.dockerproxy.com"
ghcr.io:
endpoint:
- "https://ghcr.m.daocloud.io"
- "https://ghcr.dockerproxy.com"
k8s.gcr.io:
endpoint:
- "https://k8s-gcr.m.daocloud.io"
- "https://k8s.dockerproxy.com"
registry.k8s.io:
endpoint:
- "https://k8s.m.daocloud.io"
- "https://k8s.dockerproxy.com"
mcr.microsoft.com:
endpoint:
- "https://mcr.m.daocloud.io"
- "https://mcr.dockerproxy.com"
nvcr.io:
endpoint:
- "https://nvcr.m.daocloud.io"
"*":
EOF
echo "✅ 镜像源配置完成"
# 4. 安装 k3s
echo "安装 k3s..."
if [[ "$MODE" == "server" ]]; then
# 主节点
export INSTALL_K3S_EXEC="--disable=traefik --cluster-init"
curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | INSTALL_K3S_MIRROR=cn sh -
# 获取令牌
echo
echo -e "${GREEN}=== 主节点安装完成 ===${NC}"
echo -e "节点令牌: ${YELLOW}$(cat /var/lib/rancher/k3s/server/node-token)${NC}"
echo -e "集群信息: ${YELLOW}kubectl cluster-info${NC}"
else
# 从节点
export K3S_URL="$SERVER_URL"
export K3S_TOKEN="$TOKEN"
# 构建从节点的 EXEC 参数不包含 --disable=traefik
AGENT_EXEC=""
[[ -n "$NODE_NAME" ]] && AGENT_EXEC="$AGENT_EXEC --node-name=$NODE_NAME"
[[ -n "$NODE_IP" ]] && AGENT_EXEC="$AGENT_EXEC --node-ip=$NODE_IP --node-external-ip=$NODE_IP"
export INSTALL_K3S_EXEC="$AGENT_EXEC"
curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | INSTALL_K3S_MIRROR=cn sh -
echo
echo -e "${GREEN}=== 从节点安装完成 ===${NC}"
echo -e "服务状态: ${YELLOW}systemctl status k3s-agent${NC}"
fi
# 5. 验证
echo "验证安装..."
sleep 10
SERVICE_NAME="k3s$([[ "$MODE" == "agent" ]] && echo "-agent")"
if systemctl is-active --quiet $SERVICE_NAME; then
echo -e "${GREEN}✅ $SERVICE_NAME 运行正常${NC}"
else
echo -e "${RED}❌ $SERVICE_NAME 启动失败${NC}"
echo "日志: journalctl -u $SERVICE_NAME -f"
exit 1
fi
echo
echo -e "${GREEN}=== 安装成功 ===${NC}"
echo
echo "常用命令:"
if [[ "$MODE" == "server" ]]; then
echo " kubectl get nodes -o wide"
echo " kubectl get pods -A"
echo " sudo systemctl status k3s"
else
echo " sudo systemctl status k3s-agent"
echo " 在主节点检查: kubectl get nodes"
fi