在 `backend` 目录下新增 `诊断负载.sh` 脚本,提供系统负载、CPU、内存使用情况及数据库连接数的快速诊断功能。新增文档 `负载问题排查与快速降负载指南.md`,详细说明负载诊断步骤、常见原因及解决方法,帮助用户有效管理系统负载。此改动提升了系统监控能力与用户支持。
91 lines
3.0 KiB
Bash
Executable File
91 lines
3.0 KiB
Bash
Executable File
#!/bin/bash
|
||
# 快速诊断系统负载问题
|
||
|
||
echo "=== 系统负载诊断工具 ==="
|
||
echo ""
|
||
|
||
# 1. 当前负载
|
||
echo "📊 当前负载情况:"
|
||
uptime
|
||
echo ""
|
||
|
||
# 2. CPU 和内存使用
|
||
echo "💻 CPU 和内存使用:"
|
||
top -bn1 | head -5
|
||
echo ""
|
||
|
||
# 3. 查看占用 CPU 最高的进程
|
||
echo "🔥 CPU 占用最高的进程(前 10):"
|
||
ps aux --sort=-%cpu | head -11 | awk '{printf "%-8s %-6s %-6s %-6s %s\n", $1, $2, $3"%", $4"%", $11}'
|
||
echo ""
|
||
|
||
# 4. 查看 Python 进程(交易服务)
|
||
echo "🐍 Python 进程(交易服务):"
|
||
ps aux | grep -E "python.*trading|python.*main|uvicorn" | grep -v grep | awk '{printf "PID: %-6s CPU: %-5s MEM: %-5s CMD: %s\n", $2, $3"%", $4"%", $11" "$12" "$13" "$14}'
|
||
echo ""
|
||
|
||
# 5. 检查是否有同步操作在运行
|
||
echo "🔄 检查同步操作:"
|
||
if [ -f "logs/api.log" ]; then
|
||
echo "最近的同步日志(最后 10 行):"
|
||
tail -100 logs/api.log | grep -i "同步\|sync.*binance\|sync_trades" | tail -10 || echo " 未找到同步日志"
|
||
else
|
||
echo " 日志文件不存在"
|
||
fi
|
||
echo ""
|
||
|
||
# 6. 检查数据库连接数
|
||
echo "🗄️ 数据库连接数:"
|
||
if command -v mysql >/dev/null 2>&1; then
|
||
DB_HOST="${DB_HOST:-localhost}"
|
||
DB_USER="${DB_USER:-root}"
|
||
DB_PASS="${DB_PASS:-}"
|
||
DB_NAME="${DB_NAME:-auto_trade_sys}"
|
||
|
||
if [ -n "$DB_PASS" ]; then
|
||
mysql -h"$DB_HOST" -u"$DB_USER" -p"$DB_PASS" -e "SHOW PROCESSLIST;" 2>/dev/null | head -20 || echo " 无法连接数据库"
|
||
else
|
||
mysql -h"$DB_HOST" -u"$DB_USER" -e "SHOW PROCESSLIST;" 2>/dev/null | head -20 || echo " 无法连接数据库(需要配置 DB_PASS)"
|
||
fi
|
||
else
|
||
echo " mysql 客户端未安装"
|
||
fi
|
||
echo ""
|
||
|
||
# 7. 检查内存使用详情
|
||
echo "💾 内存使用详情:"
|
||
free -h
|
||
echo ""
|
||
|
||
# 8. 检查是否有大量 I/O 等待
|
||
echo "📈 I/O 和系统状态(5秒采样):"
|
||
vmstat 1 5
|
||
echo ""
|
||
|
||
# 9. 检查交易服务日志中的错误
|
||
echo "⚠️ 最近的错误日志(最后 5 条):"
|
||
if [ -f "logs/api.log" ]; then
|
||
tail -200 logs/api.log | grep -i "error\|exception\|failed\|timeout" | tail -5 || echo " 未找到错误日志"
|
||
fi
|
||
if [ -f "../trading_system/logs/trading_*.log" ] 2>/dev/null; then
|
||
tail -200 ../trading_system/logs/trading_*.log 2>/dev/null | grep -i "error\|exception\|failed" | tail -5 || echo ""
|
||
fi
|
||
echo ""
|
||
|
||
echo "=== 诊断完成 ==="
|
||
echo ""
|
||
echo "💡 建议:"
|
||
echo " 1. 如果 CPU 占用高的是 Python 进程,可能是:"
|
||
echo " - 市场扫描正在运行(计算技术指标)"
|
||
echo " - 订单同步正在运行(从币安拉取大量订单)"
|
||
echo " - 数据库查询慢(检查慢查询日志)"
|
||
echo ""
|
||
echo " 2. 如果内存占用高,检查是否有内存泄漏"
|
||
echo ""
|
||
echo " 3. 如果 I/O 等待高,可能是数据库查询慢或磁盘慢"
|
||
echo ""
|
||
echo " 4. 临时降负载方法:"
|
||
echo " - 暂停市场扫描(在配置中设置 SCAN_ENABLED=False)"
|
||
echo " - 等待同步操作完成(不要手动取消)"
|
||
echo " - 重启交易服务(如果进程异常)"
|