Skip to content

监控指标

LCGYL Framework 提供全面的监控支持,帮助你了解应用的运行状态和性能表现。

监控配置

基本配置

properties
# application.properties
monitoring.enabled=true
monitoring.metrics.enabled=true
monitoring.health.enabled=true
monitoring.tracing.enabled=true

指标导出

properties
# Prometheus
monitoring.export.prometheus.enabled=true
monitoring.export.prometheus.endpoint=/metrics

# InfluxDB
monitoring.export.influxdb.enabled=true
monitoring.export.influxdb.uri=http://localhost:8086
monitoring.export.influxdb.db=metrics

健康检查

内置健康检查

java
@Component
public class HealthEndpoint {
    
    @Inject
    private List<HealthIndicator> healthIndicators;
    
    @GetMapping("/health")
    public Health health() {
        Map<String, Health> details = new HashMap<>();
        boolean healthy = true;
        
        for (HealthIndicator indicator : healthIndicators) {
            Health health = indicator.health();
            details.put(indicator.getName(), health);
            if (health.getStatus() != Status.UP) {
                healthy = false;
            }
        }
        
        return Health.builder()
            .status(healthy ? Status.UP : Status.DOWN)
            .details(details)
            .build();
    }
}

自定义健康检查

java
@Component
public class DatabaseHealthIndicator implements HealthIndicator {
    
    @Inject
    private DataSource dataSource;
    
    @Override
    public String getName() {
        return "database";
    }
    
    @Override
    public Health health() {
        try (Connection conn = dataSource.getConnection()) {
            if (conn.isValid(5)) {
                return Health.up()
                    .withDetail("database", "MySQL")
                    .withDetail("status", "connected")
                    .build();
            }
        } catch (SQLException e) {
            return Health.down()
                .withDetail("error", e.getMessage())
                .build();
        }
        return Health.down().build();
    }
}

@Component
public class RedisHealthIndicator implements HealthIndicator {
    
    @Inject
    private RedisTemplate<String, Object> redisTemplate;
    
    @Override
    public String getName() {
        return "redis";
    }
    
    @Override
    public Health health() {
        try {
            String result = redisTemplate.getConnectionFactory()
                .getConnection()
                .ping();
            
            if ("PONG".equals(result)) {
                return Health.up()
                    .withDetail("status", "connected")
                    .build();
            }
        } catch (Exception e) {
            return Health.down()
                .withDetail("error", e.getMessage())
                .build();
        }
        return Health.down().build();
    }
}

健康检查响应

json
{
  "status": "UP",
  "details": {
    "database": {
      "status": "UP",
      "details": {
        "database": "MySQL",
        "status": "connected"
      }
    },
    "redis": {
      "status": "UP",
      "details": {
        "status": "connected"
      }
    },
    "diskSpace": {
      "status": "UP",
      "details": {
        "total": 500000000000,
        "free": 200000000000,
        "threshold": 10000000000
      }
    }
  }
}

指标收集

计数器

java
@Component
public class OrderMetrics {
    
    @Inject
    private MeterRegistry meterRegistry;
    
    private Counter orderCreatedCounter;
    private Counter orderFailedCounter;
    
    @PostConstruct
    public void init() {
        orderCreatedCounter = meterRegistry.counter("orders.created");
        orderFailedCounter = meterRegistry.counter("orders.failed");
    }
    
    public void recordOrderCreated() {
        orderCreatedCounter.increment();
    }
    
    public void recordOrderFailed() {
        orderFailedCounter.increment();
    }
    
    // 带标签的计数器
    public void recordOrder(String status, String type) {
        meterRegistry.counter("orders", 
            "status", status, 
            "type", type
        ).increment();
    }
}

计时器

java
@Component
public class PerformanceMetrics {
    
    @Inject
    private MeterRegistry meterRegistry;
    
    public void recordApiLatency(String endpoint, long durationMs) {
        meterRegistry.timer("api.latency", "endpoint", endpoint)
            .record(durationMs, TimeUnit.MILLISECONDS);
    }
    
    // 使用 Sample
    public Timer.Sample startTimer() {
        return Timer.start(meterRegistry);
    }
    
    public void stopTimer(Timer.Sample sample, String endpoint) {
        sample.stop(meterRegistry.timer("api.latency", "endpoint", endpoint));
    }
}

仪表盘

java
@Component
public class SystemMetrics {
    
    @Inject
    private MeterRegistry meterRegistry;
    
    @Inject
    private ConnectionPool connectionPool;
    
    @PostConstruct
    public void init() {
        // 注册仪表盘
        meterRegistry.gauge("connection.pool.active", 
            connectionPool, ConnectionPool::getActiveConnections);
        
        meterRegistry.gauge("connection.pool.idle", 
            connectionPool, ConnectionPool::getIdleConnections);
        
        meterRegistry.gauge("jvm.memory.used", 
            Runtime.getRuntime(), 
            r -> r.totalMemory() - r.freeMemory());
    }
}

分布摘要

java
@Component
public class RequestMetrics {
    
    @Inject
    private MeterRegistry meterRegistry;
    
    private DistributionSummary requestSizeSummary;
    
    @PostConstruct
    public void init() {
        requestSizeSummary = DistributionSummary.builder("request.size")
            .baseUnit("bytes")
            .publishPercentiles(0.5, 0.95, 0.99)
            .register(meterRegistry);
    }
    
    public void recordRequestSize(long bytes) {
        requestSizeSummary.record(bytes);
    }
}

注解方式

@Timed

java
@Component
public class UserService {
    
    @Timed(value = "user.find", description = "查找用户耗时")
    public User findById(Long id) {
        return userRepository.findById(id);
    }
    
    @Timed(value = "user.create", percentiles = {0.5, 0.95, 0.99})
    public User create(User user) {
        return userRepository.save(user);
    }
}

@Counted

java
@Component
public class OrderService {
    
    @Counted(value = "order.created", description = "创建订单次数")
    public Order createOrder(Order order) {
        return orderRepository.save(order);
    }
}

链路追踪

配置追踪

properties
monitoring.tracing.enabled=true
monitoring.tracing.sampler.probability=1.0
monitoring.tracing.exporter=zipkin
monitoring.tracing.zipkin.endpoint=http://localhost:9411/api/v2/spans

手动追踪

java
@Component
public class OrderService {
    
    @Inject
    private Tracer tracer;
    
    public Order createOrder(Order order) {
        Span span = tracer.spanBuilder("createOrder")
            .setAttribute("orderId", order.getId())
            .setAttribute("userId", order.getUserId())
            .startSpan();
        
        try (Scope scope = span.makeCurrent()) {
            // 业务逻辑
            Order result = orderRepository.save(order);
            span.setAttribute("status", "success");
            return result;
        } catch (Exception e) {
            span.setStatus(StatusCode.ERROR, e.getMessage());
            span.recordException(e);
            throw e;
        } finally {
            span.end();
        }
    }
}

传播上下文

java
@Component
public class HttpClientInterceptor implements ClientHttpRequestInterceptor {
    
    @Inject
    private Tracer tracer;
    
    @Override
    public ClientHttpResponse intercept(HttpRequest request, byte[] body, 
            ClientHttpRequestExecution execution) throws IOException {
        
        // 注入追踪头
        Span currentSpan = tracer.getCurrentSpan();
        if (currentSpan != null) {
            request.getHeaders().add("X-Trace-Id", currentSpan.getTraceId());
            request.getHeaders().add("X-Span-Id", currentSpan.getSpanId());
        }
        
        return execution.execute(request, body);
    }
}

日志集成

结构化日志

java
@Component
public class OrderService {
    
    private static final Logger logger = LoggerFactory.getLogger(OrderService.class);
    
    public Order createOrder(Order order) {
        logger.info("创建订单", 
            kv("orderId", order.getId()),
            kv("userId", order.getUserId()),
            kv("amount", order.getTotalAmount()));
        
        try {
            Order result = orderRepository.save(order);
            logger.info("订单创建成功", 
                kv("orderId", result.getId()),
                kv("duration", duration));
            return result;
        } catch (Exception e) {
            logger.error("订单创建失败", 
                kv("orderId", order.getId()),
                kv("error", e.getMessage()));
            throw e;
        }
    }
}

MDC 上下文

java
@Component
public class RequestFilter implements Filter {
    
    @Override
    public void doFilter(ServletRequest request, ServletResponse response, 
            FilterChain chain) throws IOException, ServletException {
        
        String requestId = UUID.randomUUID().toString();
        MDC.put("requestId", requestId);
        MDC.put("userId", getCurrentUserId());
        
        try {
            chain.doFilter(request, response);
        } finally {
            MDC.clear();
        }
    }
}

告警

告警规则

java
@Component
public class AlertService {
    
    @Inject
    private MeterRegistry meterRegistry;
    
    @Inject
    private NotificationService notificationService;
    
    @Scheduled(fixedRate = 60000)
    public void checkAlerts() {
        // 检查错误率
        double errorRate = getErrorRate();
        if (errorRate > 0.05) {
            notificationService.sendAlert(
                "错误率告警",
                String.format("当前错误率: %.2f%%", errorRate * 100)
            );
        }
        
        // 检查响应时间
        double p99Latency = getP99Latency();
        if (p99Latency > 1000) {
            notificationService.sendAlert(
                "响应时间告警",
                String.format("P99 响应时间: %.0fms", p99Latency)
            );
        }
    }
}

告警通知

java
@Component
public class NotificationService {
    
    public void sendAlert(String title, String message) {
        // 发送邮件
        sendEmail(title, message);
        
        // 发送钉钉
        sendDingTalk(title, message);
        
        // 发送短信
        sendSms(title, message);
    }
}

仪表盘

Grafana 配置

json
{
  "dashboard": {
    "title": "LCGYL Application Dashboard",
    "panels": [
      {
        "title": "请求量",
        "type": "graph",
        "targets": [
          {
            "expr": "rate(http_requests_total[5m])",
            "legendFormat": "{{method}} {{uri}}"
          }
        ]
      },
      {
        "title": "响应时间",
        "type": "graph",
        "targets": [
          {
            "expr": "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m]))",
            "legendFormat": "P99"
          }
        ]
      },
      {
        "title": "错误率",
        "type": "singlestat",
        "targets": [
          {
            "expr": "sum(rate(http_requests_total{status=~\"5..\"}[5m])) / sum(rate(http_requests_total[5m]))"
          }
        ]
      }
    ]
  }
}

最佳实践

1. 合理命名指标

java
// ✅ 推荐:有意义的命名
meterRegistry.counter("orders.created");
meterRegistry.timer("api.users.find.latency");

// ❌ 不推荐:模糊命名
meterRegistry.counter("counter1");
meterRegistry.timer("timer");

2. 使用标签

java
// ✅ 推荐:使用标签区分
meterRegistry.counter("http.requests", 
    "method", "GET", 
    "uri", "/api/users",
    "status", "200");

// ❌ 不推荐:创建大量指标
meterRegistry.counter("http.requests.get.api.users.200");

3. 避免高基数标签

java
// ❌ 不推荐:用户 ID 作为标签(高基数)
meterRegistry.counter("requests", "userId", userId);

// ✅ 推荐:使用有限的标签值
meterRegistry.counter("requests", "userType", userType);

4. 设置合理的采样率

properties
# 生产环境
monitoring.tracing.sampler.probability=0.1

# 开发环境
monitoring.tracing.sampler.probability=1.0

常见问题

Q: 监控对性能有影响吗?

A: 有轻微影响,但通常可以忽略。建议:

  • 使用异步导出
  • 合理设置采样率
  • 避免高基数标签

Q: 如何选择监控工具?

A:

  • Prometheus + Grafana:开源,功能强大
  • InfluxDB + Grafana:时序数据库
  • ELK Stack:日志分析
  • Jaeger/Zipkin:链路追踪

Q: 指标数据保留多久?

A: 根据需求设置:

  • 高精度数据:7 天
  • 聚合数据:30 天
  • 历史数据:1 年

下一步

Released under the Apache License 2.0