413 lines
11 KiB
TypeScript
413 lines
11 KiB
TypeScript
|
|
import { DomainEventBus } from '../runtime/DomainEventBus';
|
||
|
|
|
||
|
|
// 异常信息
|
||
|
|
export interface ExceptionInfo {
|
||
|
|
id: string;
|
||
|
|
type: string;
|
||
|
|
message: string;
|
||
|
|
stack?: string;
|
||
|
|
service: string;
|
||
|
|
context?: Record<string, any>;
|
||
|
|
severity: 'low' | 'medium' | 'high' | 'critical';
|
||
|
|
status: 'open' | 'processing' | 'resolved' | 'ignored';
|
||
|
|
createdAt: Date;
|
||
|
|
lastUpdated: Date;
|
||
|
|
resolvedAt?: Date;
|
||
|
|
resolvedBy?: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 异常统计
|
||
|
|
export interface ExceptionStats {
|
||
|
|
total: number;
|
||
|
|
bySeverity: {
|
||
|
|
low: number;
|
||
|
|
medium: number;
|
||
|
|
high: number;
|
||
|
|
critical: number;
|
||
|
|
};
|
||
|
|
byStatus: {
|
||
|
|
open: number;
|
||
|
|
processing: number;
|
||
|
|
resolved: number;
|
||
|
|
ignored: number;
|
||
|
|
};
|
||
|
|
byService: Record<string, number>;
|
||
|
|
byType: Record<string, number>;
|
||
|
|
averageResolutionTime: number; // 平均解决时间(分钟)
|
||
|
|
last24Hours: number;
|
||
|
|
last7Days: number;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 异常监控
|
||
|
|
export class ExceptionMonitor {
|
||
|
|
private static instance: ExceptionMonitor;
|
||
|
|
private exceptions: Map<string, ExceptionInfo> = new Map();
|
||
|
|
private eventBus: DomainEventBus;
|
||
|
|
private severityThresholds: Record<string, number> = {
|
||
|
|
low: 10,
|
||
|
|
medium: 5,
|
||
|
|
high: 2,
|
||
|
|
critical: 1
|
||
|
|
};
|
||
|
|
|
||
|
|
private constructor() {
|
||
|
|
this.eventBus = DomainEventBus.getInstance();
|
||
|
|
}
|
||
|
|
|
||
|
|
static getInstance(): ExceptionMonitor {
|
||
|
|
if (!ExceptionMonitor.instance) {
|
||
|
|
ExceptionMonitor.instance = new ExceptionMonitor();
|
||
|
|
}
|
||
|
|
return ExceptionMonitor.instance;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 记录异常
|
||
|
|
async recordException(exception: Omit<ExceptionInfo, 'id' | 'status' | 'createdAt' | 'lastUpdated'>): Promise<ExceptionInfo> {
|
||
|
|
const id = `exception_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
||
|
|
|
||
|
|
const newException: ExceptionInfo = {
|
||
|
|
...exception,
|
||
|
|
id,
|
||
|
|
status: 'open',
|
||
|
|
createdAt: new Date(),
|
||
|
|
lastUpdated: new Date()
|
||
|
|
};
|
||
|
|
|
||
|
|
this.exceptions.set(id, newException);
|
||
|
|
this.eventBus.publish('exception.recorded', newException);
|
||
|
|
|
||
|
|
// 检查是否需要触发告警
|
||
|
|
await this.checkAlertThreshold(newException);
|
||
|
|
|
||
|
|
return newException;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 获取异常信息
|
||
|
|
getException(exceptionId: string): ExceptionInfo | undefined {
|
||
|
|
return this.exceptions.get(exceptionId);
|
||
|
|
}
|
||
|
|
|
||
|
|
// 获取所有异常
|
||
|
|
getAllExceptions(filters?: {
|
||
|
|
severity?: string;
|
||
|
|
status?: string;
|
||
|
|
service?: string;
|
||
|
|
type?: string;
|
||
|
|
startDate?: Date;
|
||
|
|
endDate?: Date;
|
||
|
|
}): ExceptionInfo[] {
|
||
|
|
let result = Array.from(this.exceptions.values());
|
||
|
|
|
||
|
|
if (filters) {
|
||
|
|
if (filters.severity) {
|
||
|
|
result = result.filter(e => e.severity === filters.severity);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (filters.status) {
|
||
|
|
result = result.filter(e => e.status === filters.status);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (filters.service) {
|
||
|
|
result = result.filter(e => e.service === filters.service);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (filters.type) {
|
||
|
|
result = result.filter(e => e.type === filters.type);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (filters.startDate) {
|
||
|
|
result = result.filter(e => e.createdAt >= filters.startDate!);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (filters.endDate) {
|
||
|
|
result = result.filter(e => e.createdAt <= filters.endDate!);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return result.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime());
|
||
|
|
}
|
||
|
|
|
||
|
|
// 更新异常状态
|
||
|
|
async updateExceptionStatus(exceptionId: string, status: 'open' | 'processing' | 'resolved' | 'ignored', resolvedBy?: string): Promise<ExceptionInfo | null> {
|
||
|
|
const exception = this.exceptions.get(exceptionId);
|
||
|
|
if (!exception) {
|
||
|
|
return null;
|
||
|
|
}
|
||
|
|
|
||
|
|
const updatedException = {
|
||
|
|
...exception,
|
||
|
|
status,
|
||
|
|
lastUpdated: new Date(),
|
||
|
|
resolvedAt: status === 'resolved' ? new Date() : exception.resolvedAt,
|
||
|
|
resolvedBy: status === 'resolved' ? resolvedBy : exception.resolvedBy
|
||
|
|
};
|
||
|
|
|
||
|
|
this.exceptions.set(exceptionId, updatedException);
|
||
|
|
this.eventBus.publish('exception.status.updated', updatedException);
|
||
|
|
return updatedException;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 更新异常信息
|
||
|
|
async updateException(exceptionId: string, updates: Partial<ExceptionInfo>): Promise<ExceptionInfo | null> {
|
||
|
|
const exception = this.exceptions.get(exceptionId);
|
||
|
|
if (!exception) {
|
||
|
|
return null;
|
||
|
|
}
|
||
|
|
|
||
|
|
const updatedException = {
|
||
|
|
...exception,
|
||
|
|
...updates,
|
||
|
|
lastUpdated: new Date()
|
||
|
|
};
|
||
|
|
|
||
|
|
this.exceptions.set(exceptionId, updatedException);
|
||
|
|
this.eventBus.publish('exception.updated', updatedException);
|
||
|
|
return updatedException;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 检查告警阈值
|
||
|
|
private async checkAlertThreshold(exception: ExceptionInfo): Promise<void> {
|
||
|
|
const threshold = this.severityThresholds[exception.severity];
|
||
|
|
if (!threshold) return;
|
||
|
|
|
||
|
|
// 检查过去24小时内相同类型的异常数量
|
||
|
|
const last24Hours = new Date();
|
||
|
|
last24Hours.setHours(last24Hours.getHours() - 24);
|
||
|
|
|
||
|
|
const similarExceptions = Array.from(this.exceptions.values()).filter(e =>
|
||
|
|
e.type === exception.type &&
|
||
|
|
e.service === exception.service &&
|
||
|
|
e.createdAt >= last24Hours
|
||
|
|
);
|
||
|
|
|
||
|
|
if (similarExceptions.length >= threshold) {
|
||
|
|
this.eventBus.publish('exception.alert', {
|
||
|
|
exception,
|
||
|
|
count: similarExceptions.length,
|
||
|
|
threshold,
|
||
|
|
timestamp: new Date()
|
||
|
|
});
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// 获取异常统计
|
||
|
|
getExceptionStats(): ExceptionStats {
|
||
|
|
const exceptions = Array.from(this.exceptions.values());
|
||
|
|
const total = exceptions.length;
|
||
|
|
|
||
|
|
const bySeverity = {
|
||
|
|
low: 0,
|
||
|
|
medium: 0,
|
||
|
|
high: 0,
|
||
|
|
critical: 0
|
||
|
|
};
|
||
|
|
|
||
|
|
const byStatus = {
|
||
|
|
open: 0,
|
||
|
|
processing: 0,
|
||
|
|
resolved: 0,
|
||
|
|
ignored: 0
|
||
|
|
};
|
||
|
|
|
||
|
|
const byService: Record<string, number> = {};
|
||
|
|
const byType: Record<string, number> = {};
|
||
|
|
|
||
|
|
let totalResolutionTime = 0;
|
||
|
|
let resolvedExceptions = 0;
|
||
|
|
|
||
|
|
const last24Hours = new Date();
|
||
|
|
last24Hours.setHours(last24Hours.getHours() - 24);
|
||
|
|
|
||
|
|
const last7Days = new Date();
|
||
|
|
last7Days.setDate(last7Days.getDate() - 7);
|
||
|
|
|
||
|
|
let exceptionsLast24Hours = 0;
|
||
|
|
let exceptionsLast7Days = 0;
|
||
|
|
|
||
|
|
for (const exception of exceptions) {
|
||
|
|
bySeverity[exception.severity]++;
|
||
|
|
byStatus[exception.status]++;
|
||
|
|
|
||
|
|
byService[exception.service] = (byService[exception.service] || 0) + 1;
|
||
|
|
byType[exception.type] = (byType[exception.type] || 0) + 1;
|
||
|
|
|
||
|
|
if (exception.status === 'resolved' && exception.resolvedAt) {
|
||
|
|
const resolutionTime = (exception.resolvedAt.getTime() - exception.createdAt.getTime()) / (1000 * 60); // 转换为分钟
|
||
|
|
totalResolutionTime += resolutionTime;
|
||
|
|
resolvedExceptions++;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (exception.createdAt >= last24Hours) {
|
||
|
|
exceptionsLast24Hours++;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (exception.createdAt >= last7Days) {
|
||
|
|
exceptionsLast7Days++;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
const averageResolutionTime = resolvedExceptions > 0
|
||
|
|
? totalResolutionTime / resolvedExceptions
|
||
|
|
: 0;
|
||
|
|
|
||
|
|
return {
|
||
|
|
total,
|
||
|
|
bySeverity,
|
||
|
|
byStatus,
|
||
|
|
byService,
|
||
|
|
byType,
|
||
|
|
averageResolutionTime,
|
||
|
|
last24Hours: exceptionsLast24Hours,
|
||
|
|
last7Days: exceptionsLast7Days
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
// 获取服务异常统计
|
||
|
|
getServiceExceptionStats(service: string): ExceptionStats {
|
||
|
|
const serviceExceptions = Array.from(this.exceptions.values()).filter(e => e.service === service);
|
||
|
|
|
||
|
|
const total = serviceExceptions.length;
|
||
|
|
|
||
|
|
const bySeverity = {
|
||
|
|
low: 0,
|
||
|
|
medium: 0,
|
||
|
|
high: 0,
|
||
|
|
critical: 0
|
||
|
|
};
|
||
|
|
|
||
|
|
const byStatus = {
|
||
|
|
open: 0,
|
||
|
|
processing: 0,
|
||
|
|
resolved: 0,
|
||
|
|
ignored: 0
|
||
|
|
};
|
||
|
|
|
||
|
|
const byType: Record<string, number> = {};
|
||
|
|
|
||
|
|
let totalResolutionTime = 0;
|
||
|
|
let resolvedExceptions = 0;
|
||
|
|
|
||
|
|
const last24Hours = new Date();
|
||
|
|
last24Hours.setHours(last24Hours.getHours() - 24);
|
||
|
|
|
||
|
|
const last7Days = new Date();
|
||
|
|
last7Days.setDate(last7Days.getDate() - 7);
|
||
|
|
|
||
|
|
let exceptionsLast24Hours = 0;
|
||
|
|
let exceptionsLast7Days = 0;
|
||
|
|
|
||
|
|
for (const exception of serviceExceptions) {
|
||
|
|
bySeverity[exception.severity]++;
|
||
|
|
byStatus[exception.status]++;
|
||
|
|
|
||
|
|
byType[exception.type] = (byType[exception.type] || 0) + 1;
|
||
|
|
|
||
|
|
if (exception.status === 'resolved' && exception.resolvedAt) {
|
||
|
|
const resolutionTime = (exception.resolvedAt.getTime() - exception.createdAt.getTime()) / (1000 * 60); // 转换为分钟
|
||
|
|
totalResolutionTime += resolutionTime;
|
||
|
|
resolvedExceptions++;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (exception.createdAt >= last24Hours) {
|
||
|
|
exceptionsLast24Hours++;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (exception.createdAt >= last7Days) {
|
||
|
|
exceptionsLast7Days++;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
const averageResolutionTime = resolvedExceptions > 0
|
||
|
|
? totalResolutionTime / resolvedExceptions
|
||
|
|
: 0;
|
||
|
|
|
||
|
|
return {
|
||
|
|
total,
|
||
|
|
bySeverity,
|
||
|
|
byStatus,
|
||
|
|
byService: { [service]: total },
|
||
|
|
byType,
|
||
|
|
averageResolutionTime,
|
||
|
|
last24Hours: exceptionsLast24Hours,
|
||
|
|
last7Days: exceptionsLast7Days
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
// 清理旧异常
|
||
|
|
async cleanupOldExceptions(days: number = 30): Promise<number> {
|
||
|
|
const cutoffDate = new Date();
|
||
|
|
cutoffDate.setDate(cutoffDate.getDate() - days);
|
||
|
|
|
||
|
|
let removedCount = 0;
|
||
|
|
|
||
|
|
for (const [id, exception] of this.exceptions.entries()) {
|
||
|
|
if (exception.createdAt < cutoffDate && exception.status === 'resolved') {
|
||
|
|
this.exceptions.delete(id);
|
||
|
|
removedCount++;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if (removedCount > 0) {
|
||
|
|
this.eventBus.publish('exception.cleanup', {
|
||
|
|
removedCount,
|
||
|
|
cutoffDate,
|
||
|
|
timestamp: new Date()
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
return removedCount;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 生成异常报告
|
||
|
|
async generateExceptionReport(startDate: Date, endDate: Date): Promise<{
|
||
|
|
summary: ExceptionStats;
|
||
|
|
topExceptions: ExceptionInfo[];
|
||
|
|
serviceStats: Record<string, ExceptionStats>;
|
||
|
|
}> {
|
||
|
|
const filteredExceptions = Array.from(this.exceptions.values()).filter(
|
||
|
|
e => e.createdAt >= startDate && e.createdAt <= endDate
|
||
|
|
);
|
||
|
|
|
||
|
|
// 按频率排序的异常类型
|
||
|
|
const typeFrequency: Record<string, number> = {};
|
||
|
|
for (const exception of filteredExceptions) {
|
||
|
|
typeFrequency[exception.type] = (typeFrequency[exception.type] || 0) + 1;
|
||
|
|
}
|
||
|
|
|
||
|
|
const sortedTypes = Object.entries(typeFrequency)
|
||
|
|
.sort(([, a], [, b]) => b - a)
|
||
|
|
.slice(0, 10)
|
||
|
|
.map(([type]) => type);
|
||
|
|
|
||
|
|
const topExceptions = filteredExceptions
|
||
|
|
.filter(e => sortedTypes.includes(e.type))
|
||
|
|
.slice(0, 20);
|
||
|
|
|
||
|
|
// 按服务分组的统计
|
||
|
|
const serviceStats: Record<string, ExceptionStats> = {};
|
||
|
|
const services = Array.from(new Set(filteredExceptions.map(e => e.service)));
|
||
|
|
for (const service of services) {
|
||
|
|
serviceStats[service] = this.getServiceExceptionStats(service);
|
||
|
|
}
|
||
|
|
|
||
|
|
return {
|
||
|
|
summary: this.getExceptionStats(),
|
||
|
|
topExceptions,
|
||
|
|
serviceStats
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
// 设置严重性阈值
|
||
|
|
setSeverityThresholds(thresholds: Record<string, number>): void {
|
||
|
|
this.severityThresholds = {
|
||
|
|
...this.severityThresholds,
|
||
|
|
...thresholds
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
// 获取严重性阈值
|
||
|
|
getSeverityThresholds(): Record<string, number> {
|
||
|
|
return { ...this.severityThresholds };
|
||
|
|
}
|
||
|
|
}
|