Shell:进程守护脚本

加入cornjob每5分钟运行一次,若进程掉线就运行对应目录下的start.sh,重启失败调用py告警脚本

#!/bin/bash
# 导出目录路径
exporter_dir="/apps/svr/prometheus/exporters"
# 导出器列表
exporters=("kafka_exporter" "mysqld_exporter" "node_exporter" "redis_exporter" "zookeeper_exporter")
# 检查进程是否正在运行
check_process() {
    local exporter_name=$1
    pgrep -f "$exporter_name" > /dev/null
    return $?
}
# 启动导出器
start_exporter() {
    local exporter_name=$1
    local start_script="$exporter_dir/$exporter_name/start.sh"
    if [ -x "$start_script" ]; then
        echo "Starting $exporter_name..."
        nohup "$start_script" > "$exporter_dir/$exporter_name.log" 2>&1 &
    else
        echo "Start script for $exporter_name not found or not executable"
    fi
}
# 检查和启动导出器
for exporter in "${exporters[@]}"; do
    check_process "$exporter"
    if [ $? -ne 0 ]; then
        echo "$exporter is not running. Restarting..."
        start_exporter "$exporter"
        check_process "$exporter"
        if [ $? -eq 0 ]; then
            echo "$exporter restart successful!"
        else
            echo "$exporter restart failed!"
            # 这里可以添加py告警脚本的调用
        fi
    else
        echo "$exporter is running."
    fi
done
#*/5 * * * * /path/to/check_exporters.sh >> /path/to/check_exporters.log 2>&1