- 新增文档模板和导航结构 - 实现服务器基础API路由和控制器 - 添加扩展插件配置和前端框架 - 引入多租户和权限管理模块 - 集成日志和数据库配置 - 添加核心业务模型和类型定义
107 lines
3.2 KiB
TypeScript
107 lines
3.2 KiB
TypeScript
import { AIService } from './AIService';
|
|
import { logger } from '../utils/logger';
|
|
import db from '../config/database';
|
|
|
|
export interface SelectorRepairResult {
|
|
success: boolean;
|
|
newSelector?: string;
|
|
confidence: number;
|
|
}
|
|
|
|
/**
|
|
* [CORE_AI_09] 自愈式采集选择器 (Self-Healing Crawler Selectors)
|
|
* @description 当爬虫选择器失效时,自动利用 AI 重新定位目标元素
|
|
*/
|
|
export class SelfHealingService {
|
|
private static readonly TABLE_NAME = 'cf_selector_repairs';
|
|
|
|
/**
|
|
* 修复失效的选择器
|
|
*/
|
|
static async repairSelector(params: {
|
|
platform: string;
|
|
targetField: string;
|
|
oldSelector: string;
|
|
htmlContext: string;
|
|
}): Promise<SelectorRepairResult> {
|
|
logger.warn(`[SelfHealing] Repairing selector for ${params.platform}.${params.targetField}: ${params.oldSelector}`);
|
|
|
|
try {
|
|
// 1. 调用 AI 获取新选择器
|
|
const repairResult = await AIService.suggestNewSelector({
|
|
field: params.targetField,
|
|
oldSelector: params.oldSelector,
|
|
html: params.htmlContext
|
|
});
|
|
|
|
if (repairResult && repairResult.newSelector) {
|
|
// 2. 记录修复记录
|
|
await this.recordRepair({
|
|
platform: params.platform,
|
|
field: params.targetField,
|
|
old_selector: params.oldSelector,
|
|
new_selector: repairResult.newSelector,
|
|
confidence: repairResult.confidence,
|
|
status: 'pending_verification'
|
|
});
|
|
|
|
return {
|
|
success: true,
|
|
newSelector: repairResult.newSelector,
|
|
confidence: repairResult.confidence
|
|
};
|
|
}
|
|
|
|
return { success: false, confidence: 0 };
|
|
} catch (err: any) {
|
|
logger.error(`[SelfHealing] Repair failed: ${err.message}`);
|
|
return { success: false, confidence: 0 };
|
|
}
|
|
}
|
|
|
|
/**
|
|
* 记录修复动作
|
|
*/
|
|
private static async recordRepair(data: any) {
|
|
await db(this.TABLE_NAME).insert({
|
|
...data,
|
|
created_at: new Date(),
|
|
updated_at: new Date()
|
|
});
|
|
}
|
|
|
|
/**
|
|
* 获取缓存的已修复选择器
|
|
*/
|
|
static async getHealedSelector(platform: string, field: string): Promise<string | null> {
|
|
const record = await db(this.TABLE_NAME)
|
|
.where({ platform, field, status: 'verified' })
|
|
.orderBy('created_at', 'desc')
|
|
.first();
|
|
return record ? record.new_selector : null;
|
|
}
|
|
|
|
/**
|
|
* 初始化数据库表
|
|
*/
|
|
static async initTable() {
|
|
const exists = await db.schema.hasTable(this.TABLE_NAME);
|
|
if (!exists) {
|
|
logger.info(`📦 Creating ${this.TABLE_NAME} table...`);
|
|
await db.schema.createTable(this.TABLE_NAME, (table) => {
|
|
table.increments('id').primary();
|
|
table.string('platform', 32).notNullable();
|
|
table.string('field', 64).notNullable();
|
|
table.string('old_selector', 255).notNullable();
|
|
table.string('new_selector', 255).notNullable();
|
|
table.float('confidence').defaultTo(0);
|
|
table.string('status', 32).defaultTo('pending_verification'); // pending_verification, verified, failed
|
|
table.timestamps(true, true);
|
|
|
|
table.index(['platform', 'field']);
|
|
});
|
|
logger.info(`✅ Table ${this.TABLE_NAME} created`);
|
|
}
|
|
}
|
|
}
|