监控指标
LCGYL Framework 提供全面的监控支持,帮助你了解应用的运行状态和性能表现。
监控配置
基本配置
properties
# application.properties
monitoring.enabled=true
monitoring.metrics.enabled=true
monitoring.health.enabled=true
monitoring.tracing.enabled=true指标导出
properties
# Prometheus
monitoring.export.prometheus.enabled=true
monitoring.export.prometheus.endpoint=/metrics
# InfluxDB
monitoring.export.influxdb.enabled=true
monitoring.export.influxdb.uri=http://localhost:8086
monitoring.export.influxdb.db=metrics健康检查
内置健康检查
java
@Component
public class HealthEndpoint {
@Inject
private List<HealthIndicator> healthIndicators;
@GetMapping("/health")
public Health health() {
Map<String, Health> details = new HashMap<>();
boolean healthy = true;
for (HealthIndicator indicator : healthIndicators) {
Health health = indicator.health();
details.put(indicator.getName(), health);
if (health.getStatus() != Status.UP) {
healthy = false;
}
}
return Health.builder()
.status(healthy ? Status.UP : Status.DOWN)
.details(details)
.build();
}
}自定义健康检查
java
@Component
public class DatabaseHealthIndicator implements HealthIndicator {
@Inject
private DataSource dataSource;
@Override
public String getName() {
return "database";
}
@Override
public Health health() {
try (Connection conn = dataSource.getConnection()) {
if (conn.isValid(5)) {
return Health.up()
.withDetail("database", "MySQL")
.withDetail("status", "connected")
.build();
}
} catch (SQLException e) {
return Health.down()
.withDetail("error", e.getMessage())
.build();
}
return Health.down().build();
}
}
@Component
public class RedisHealthIndicator implements HealthIndicator {
@Inject
private RedisTemplate<String, Object> redisTemplate;
@Override
public String getName() {
return "redis";
}
@Override
public Health health() {
try {
String result = redisTemplate.getConnectionFactory()
.getConnection()
.ping();
if ("PONG".equals(result)) {
return Health.up()
.withDetail("status", "connected")
.build();
}
} catch (Exception e) {
return Health.down()
.withDetail("error", e.getMessage())
.build();
}
return Health.down().build();
}
}健康检查响应
json
{
"status": "UP",
"details": {
"database": {
"status": "UP",
"details": {
"database": "MySQL",
"status": "connected"
}
},
"redis": {
"status": "UP",
"details": {
"status": "connected"
}
},
"diskSpace": {
"status": "UP",
"details": {
"total": 500000000000,
"free": 200000000000,
"threshold": 10000000000
}
}
}
}指标收集
计数器
java
@Component
public class OrderMetrics {
@Inject
private MeterRegistry meterRegistry;
private Counter orderCreatedCounter;
private Counter orderFailedCounter;
@PostConstruct
public void init() {
orderCreatedCounter = meterRegistry.counter("orders.created");
orderFailedCounter = meterRegistry.counter("orders.failed");
}
public void recordOrderCreated() {
orderCreatedCounter.increment();
}
public void recordOrderFailed() {
orderFailedCounter.increment();
}
// 带标签的计数器
public void recordOrder(String status, String type) {
meterRegistry.counter("orders",
"status", status,
"type", type
).increment();
}
}计时器
java
@Component
public class PerformanceMetrics {
@Inject
private MeterRegistry meterRegistry;
public void recordApiLatency(String endpoint, long durationMs) {
meterRegistry.timer("api.latency", "endpoint", endpoint)
.record(durationMs, TimeUnit.MILLISECONDS);
}
// 使用 Sample
public Timer.Sample startTimer() {
return Timer.start(meterRegistry);
}
public void stopTimer(Timer.Sample sample, String endpoint) {
sample.stop(meterRegistry.timer("api.latency", "endpoint", endpoint));
}
}仪表盘
java
@Component
public class SystemMetrics {
@Inject
private MeterRegistry meterRegistry;
@Inject
private ConnectionPool connectionPool;
@PostConstruct
public void init() {
// 注册仪表盘
meterRegistry.gauge("connection.pool.active",
connectionPool, ConnectionPool::getActiveConnections);
meterRegistry.gauge("connection.pool.idle",
connectionPool, ConnectionPool::getIdleConnections);
meterRegistry.gauge("jvm.memory.used",
Runtime.getRuntime(),
r -> r.totalMemory() - r.freeMemory());
}
}分布摘要
java
@Component
public class RequestMetrics {
@Inject
private MeterRegistry meterRegistry;
private DistributionSummary requestSizeSummary;
@PostConstruct
public void init() {
requestSizeSummary = DistributionSummary.builder("request.size")
.baseUnit("bytes")
.publishPercentiles(0.5, 0.95, 0.99)
.register(meterRegistry);
}
public void recordRequestSize(long bytes) {
requestSizeSummary.record(bytes);
}
}注解方式
@Timed
java
@Component
public class UserService {
@Timed(value = "user.find", description = "查找用户耗时")
public User findById(Long id) {
return userRepository.findById(id);
}
@Timed(value = "user.create", percentiles = {0.5, 0.95, 0.99})
public User create(User user) {
return userRepository.save(user);
}
}@Counted
java
@Component
public class OrderService {
@Counted(value = "order.created", description = "创建订单次数")
public Order createOrder(Order order) {
return orderRepository.save(order);
}
}链路追踪
配置追踪
properties
monitoring.tracing.enabled=true
monitoring.tracing.sampler.probability=1.0
monitoring.tracing.exporter=zipkin
monitoring.tracing.zipkin.endpoint=http://localhost:9411/api/v2/spans手动追踪
java
@Component
public class OrderService {
@Inject
private Tracer tracer;
public Order createOrder(Order order) {
Span span = tracer.spanBuilder("createOrder")
.setAttribute("orderId", order.getId())
.setAttribute("userId", order.getUserId())
.startSpan();
try (Scope scope = span.makeCurrent()) {
// 业务逻辑
Order result = orderRepository.save(order);
span.setAttribute("status", "success");
return result;
} catch (Exception e) {
span.setStatus(StatusCode.ERROR, e.getMessage());
span.recordException(e);
throw e;
} finally {
span.end();
}
}
}传播上下文
java
@Component
public class HttpClientInterceptor implements ClientHttpRequestInterceptor {
@Inject
private Tracer tracer;
@Override
public ClientHttpResponse intercept(HttpRequest request, byte[] body,
ClientHttpRequestExecution execution) throws IOException {
// 注入追踪头
Span currentSpan = tracer.getCurrentSpan();
if (currentSpan != null) {
request.getHeaders().add("X-Trace-Id", currentSpan.getTraceId());
request.getHeaders().add("X-Span-Id", currentSpan.getSpanId());
}
return execution.execute(request, body);
}
}日志集成
结构化日志
java
@Component
public class OrderService {
private static final Logger logger = LoggerFactory.getLogger(OrderService.class);
public Order createOrder(Order order) {
logger.info("创建订单",
kv("orderId", order.getId()),
kv("userId", order.getUserId()),
kv("amount", order.getTotalAmount()));
try {
Order result = orderRepository.save(order);
logger.info("订单创建成功",
kv("orderId", result.getId()),
kv("duration", duration));
return result;
} catch (Exception e) {
logger.error("订单创建失败",
kv("orderId", order.getId()),
kv("error", e.getMessage()));
throw e;
}
}
}MDC 上下文
java
@Component
public class RequestFilter implements Filter {
@Override
public void doFilter(ServletRequest request, ServletResponse response,
FilterChain chain) throws IOException, ServletException {
String requestId = UUID.randomUUID().toString();
MDC.put("requestId", requestId);
MDC.put("userId", getCurrentUserId());
try {
chain.doFilter(request, response);
} finally {
MDC.clear();
}
}
}告警
告警规则
java
@Component
public class AlertService {
@Inject
private MeterRegistry meterRegistry;
@Inject
private NotificationService notificationService;
@Scheduled(fixedRate = 60000)
public void checkAlerts() {
// 检查错误率
double errorRate = getErrorRate();
if (errorRate > 0.05) {
notificationService.sendAlert(
"错误率告警",
String.format("当前错误率: %.2f%%", errorRate * 100)
);
}
// 检查响应时间
double p99Latency = getP99Latency();
if (p99Latency > 1000) {
notificationService.sendAlert(
"响应时间告警",
String.format("P99 响应时间: %.0fms", p99Latency)
);
}
}
}告警通知
java
@Component
public class NotificationService {
public void sendAlert(String title, String message) {
// 发送邮件
sendEmail(title, message);
// 发送钉钉
sendDingTalk(title, message);
// 发送短信
sendSms(title, message);
}
}仪表盘
Grafana 配置
json
{
"dashboard": {
"title": "LCGYL Application Dashboard",
"panels": [
{
"title": "请求量",
"type": "graph",
"targets": [
{
"expr": "rate(http_requests_total[5m])",
"legendFormat": "{{method}} {{uri}}"
}
]
},
{
"title": "响应时间",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m]))",
"legendFormat": "P99"
}
]
},
{
"title": "错误率",
"type": "singlestat",
"targets": [
{
"expr": "sum(rate(http_requests_total{status=~\"5..\"}[5m])) / sum(rate(http_requests_total[5m]))"
}
]
}
]
}
}最佳实践
1. 合理命名指标
java
// ✅ 推荐:有意义的命名
meterRegistry.counter("orders.created");
meterRegistry.timer("api.users.find.latency");
// ❌ 不推荐:模糊命名
meterRegistry.counter("counter1");
meterRegistry.timer("timer");2. 使用标签
java
// ✅ 推荐:使用标签区分
meterRegistry.counter("http.requests",
"method", "GET",
"uri", "/api/users",
"status", "200");
// ❌ 不推荐:创建大量指标
meterRegistry.counter("http.requests.get.api.users.200");3. 避免高基数标签
java
// ❌ 不推荐:用户 ID 作为标签(高基数)
meterRegistry.counter("requests", "userId", userId);
// ✅ 推荐:使用有限的标签值
meterRegistry.counter("requests", "userType", userType);4. 设置合理的采样率
properties
# 生产环境
monitoring.tracing.sampler.probability=0.1
# 开发环境
monitoring.tracing.sampler.probability=1.0常见问题
Q: 监控对性能有影响吗?
A: 有轻微影响,但通常可以忽略。建议:
- 使用异步导出
- 合理设置采样率
- 避免高基数标签
Q: 如何选择监控工具?
A:
- Prometheus + Grafana:开源,功能强大
- InfluxDB + Grafana:时序数据库
- ELK Stack:日志分析
- Jaeger/Zipkin:链路追踪
Q: 指标数据保留多久?
A: 根据需求设置:
- 高精度数据:7 天
- 聚合数据:30 天
- 历史数据:1 年