删除不再需要的归档文件和过时的文档模板,包括多个README、安全策略、前端集成蓝图等文件,同时移除了未使用的业务文档和项目结构文件。 优化项目结构,移除冗余文件,保持代码库整洁。主要删除archive/handover目录下的多个文件及doc目录下的部分文档模板。
476 lines
11 KiB
Markdown
476 lines
11 KiB
Markdown
# DOM Interaction (Crawlful Hub Plugin)
|
||
|
||
> **定位**:Crawlful Hub 插件 DOM 交互文档 - 描述如何与电商平台页面进行 DOM 交互。
|
||
> **更新日期**: 2026-03-18
|
||
|
||
---
|
||
|
||
## 1. DOM 选择器策略
|
||
|
||
### 1.1 选择器类型
|
||
|
||
| 类型 | 示例 | 适用场景 |
|
||
|------|------|----------|
|
||
| ID | `#productTitle` | 唯一元素 |
|
||
| Class | `.a-price-whole` | 样式类 |
|
||
| Attribute | `[data-asin]` | 数据属性 |
|
||
| XPath | `//span[@id='price']` | 复杂结构 |
|
||
| CSS Selector | `div.product > h1` | 层级关系 |
|
||
|
||
### 1.2 平台选择器映射
|
||
|
||
#### Amazon
|
||
|
||
```typescript
|
||
const amazonSelectors = {
|
||
product: {
|
||
title: '#productTitle',
|
||
price: '.a-price-whole, .a-price .a-offscreen',
|
||
listPrice: '.a-text-price .a-offscreen',
|
||
images: '#landingImage, #imgTagWrapperId img',
|
||
description: '#feature-bullets ul, #productDescription',
|
||
brand: '#bylineInfo',
|
||
asin: '[data-asin]',
|
||
rating: '#acrPopover .a-icon-alt',
|
||
reviewCount: '#acrCustomerReviewText',
|
||
availability: '#availability span',
|
||
category: '#wayfinding-breadcrumbs_container ul',
|
||
},
|
||
search: {
|
||
results: '[data-component-type="s-search-result"]',
|
||
title: 'h2 a span',
|
||
price: '.a-price-whole',
|
||
image: '.s-image',
|
||
rating: '.a-icon-alt',
|
||
},
|
||
seller: {
|
||
name: '#merchant-info a:first-child',
|
||
rating: '#merchant-info .a-icon-alt',
|
||
},
|
||
};
|
||
```
|
||
|
||
#### eBay
|
||
|
||
```typescript
|
||
const ebaySelectors = {
|
||
product: {
|
||
title: 'h1[data-testid="x-item-title-label"]',
|
||
price: '.notranslate.vi-price .notranslate',
|
||
images: '#icImg, .vi-image-gallery__image',
|
||
description: '#desc_wrapper, #ds_div',
|
||
condition: '.u-flL.condText',
|
||
seller: '.mbg-nw',
|
||
sellerRating: '.mbg-l .mbg-fb',
|
||
quantity: '#qtyTextBox',
|
||
shipping: '#fshippingCost span',
|
||
},
|
||
search: {
|
||
results: '.s-item',
|
||
title: '.s-item__title',
|
||
price: '.s-item__price',
|
||
image: '.s-item__image img',
|
||
},
|
||
};
|
||
```
|
||
|
||
#### Shopify
|
||
|
||
```typescript
|
||
const shopifySelectors = {
|
||
product: {
|
||
title: 'h1.product-title, h1[data-product-title]',
|
||
price: '.product-price, [data-product-price]',
|
||
comparePrice: '.compare-price, [data-compare-price]',
|
||
images: '.product-image, .product__media img',
|
||
description: '.product-description, [data-product-description]',
|
||
variants: '[data-variant-id]',
|
||
inventory: '[data-inventory]',
|
||
sku: '[data-sku]',
|
||
barcode: '[data-barcode]',
|
||
},
|
||
admin: {
|
||
products: 'table tbody tr',
|
||
orders: 'table tbody tr',
|
||
title: 'td:first-child a',
|
||
status: 'td:nth-child(3) span',
|
||
},
|
||
};
|
||
```
|
||
|
||
---
|
||
|
||
## 2. DOM 操作工具
|
||
|
||
### 2.1 元素提取工具
|
||
|
||
```typescript
|
||
// src/content/utils/domUtils.ts
|
||
|
||
export class DOMUtils {
|
||
/**
|
||
* 安全地获取元素文本
|
||
*/
|
||
static getText(selector: string, context: Document | Element = document): string {
|
||
const element = context.querySelector(selector);
|
||
return element?.textContent?.trim() || '';
|
||
}
|
||
|
||
/**
|
||
* 获取元素属性
|
||
*/
|
||
static getAttr(selector: string, attr: string, context: Document | Element = document): string {
|
||
const element = context.querySelector(selector);
|
||
return element?.getAttribute(attr) || '';
|
||
}
|
||
|
||
/**
|
||
* 获取多个元素的文本列表
|
||
*/
|
||
static getTextList(selector: string, context: Document | Element = document): string[] {
|
||
const elements = context.querySelectorAll(selector);
|
||
return Array.from(elements).map(el => el.textContent?.trim() || '');
|
||
}
|
||
|
||
/**
|
||
* 获取图片 URL 列表
|
||
*/
|
||
static getImageUrls(selector: string, context: Document | Element = document): string[] {
|
||
const images = context.querySelectorAll(selector);
|
||
return Array.from(images)
|
||
.map(img => {
|
||
const src = img.getAttribute('src') || img.getAttribute('data-src');
|
||
return src ? this.resolveUrl(src) : '';
|
||
})
|
||
.filter(Boolean);
|
||
}
|
||
|
||
/**
|
||
* 等待元素出现
|
||
*/
|
||
static waitForElement(
|
||
selector: string,
|
||
timeout: number = 10000,
|
||
context: Document | Element = document
|
||
): Promise<Element | null> {
|
||
return new Promise((resolve) => {
|
||
const element = context.querySelector(selector);
|
||
if (element) {
|
||
resolve(element);
|
||
return;
|
||
}
|
||
|
||
const observer = new MutationObserver(() => {
|
||
const element = context.querySelector(selector);
|
||
if (element) {
|
||
observer.disconnect();
|
||
resolve(element);
|
||
}
|
||
});
|
||
|
||
observer.observe(document.body, {
|
||
childList: true,
|
||
subtree: true,
|
||
});
|
||
|
||
setTimeout(() => {
|
||
observer.disconnect();
|
||
resolve(null);
|
||
}, timeout);
|
||
});
|
||
}
|
||
|
||
/**
|
||
* 解析相对 URL
|
||
*/
|
||
private static resolveUrl(url: string): string {
|
||
if (url.startsWith('http')) return url;
|
||
if (url.startsWith('//')) return `https:${url}`;
|
||
return new URL(url, window.location.href).href;
|
||
}
|
||
}
|
||
```
|
||
|
||
### 2.2 事件监听工具
|
||
|
||
```typescript
|
||
// src/content/utils/eventUtils.ts
|
||
|
||
export class EventUtils {
|
||
/**
|
||
* 监听页面变化
|
||
*/
|
||
static onPageChange(callback: (url: string) => void): void {
|
||
let currentUrl = window.location.href;
|
||
|
||
const observer = new MutationObserver(() => {
|
||
if (window.location.href !== currentUrl) {
|
||
currentUrl = window.location.href;
|
||
callback(currentUrl);
|
||
}
|
||
});
|
||
|
||
observer.observe(document.body, {
|
||
childList: true,
|
||
subtree: true,
|
||
});
|
||
|
||
// 同时监听 popstate 事件
|
||
window.addEventListener('popstate', () => {
|
||
callback(window.location.href);
|
||
});
|
||
}
|
||
|
||
/**
|
||
* 监听元素出现
|
||
*/
|
||
static onElementAppear(
|
||
selector: string,
|
||
callback: (element: Element) => void
|
||
): void {
|
||
const observer = new MutationObserver((mutations) => {
|
||
mutations.forEach((mutation) => {
|
||
mutation.addedNodes.forEach((node) => {
|
||
if (node instanceof Element) {
|
||
if (node.matches(selector)) {
|
||
callback(node);
|
||
}
|
||
node.querySelectorAll(selector).forEach(callback);
|
||
}
|
||
});
|
||
});
|
||
});
|
||
|
||
observer.observe(document.body, {
|
||
childList: true,
|
||
subtree: true,
|
||
});
|
||
}
|
||
|
||
/**
|
||
* 模拟用户点击
|
||
*/
|
||
static simulateClick(element: Element): void {
|
||
const event = new MouseEvent('click', {
|
||
bubbles: true,
|
||
cancelable: true,
|
||
view: window,
|
||
});
|
||
element.dispatchEvent(event);
|
||
}
|
||
|
||
/**
|
||
* 模拟输入
|
||
*/
|
||
static simulateInput(element: HTMLInputElement, value: string): void {
|
||
element.value = value;
|
||
element.dispatchEvent(new Event('input', { bubbles: true }));
|
||
element.dispatchEvent(new Event('change', { bubbles: true }));
|
||
}
|
||
}
|
||
```
|
||
|
||
---
|
||
|
||
## 3. 平台特定交互
|
||
|
||
### 3.1 Amazon 交互
|
||
|
||
```typescript
|
||
// src/content/interactions/amazonInteraction.ts
|
||
|
||
export class AmazonInteraction {
|
||
/**
|
||
* 获取商品详情
|
||
*/
|
||
async getProductDetails(): Promise<ProductData> {
|
||
const title = DOMUtils.getText('#productTitle');
|
||
const priceText = DOMUtils.getText('.a-price-whole, .a-price .a-offscreen');
|
||
const price = this.parsePrice(priceText);
|
||
|
||
const images = DOMUtils.getImageUrls('#landingImage, #imgTagWrapperId img');
|
||
const description = DOMUtils.getText('#feature-bullets ul');
|
||
const asin = DOMUtils.getAttr('[data-asin]', 'data-asin');
|
||
|
||
return {
|
||
platform: 'AMAZON',
|
||
asin,
|
||
title,
|
||
price,
|
||
images,
|
||
description,
|
||
url: window.location.href,
|
||
};
|
||
}
|
||
|
||
/**
|
||
* 获取搜索列表
|
||
*/
|
||
async getSearchResults(): Promise<ProductData[]> {
|
||
const results: ProductData[] = [];
|
||
const items = document.querySelectorAll('[data-component-type="s-search-result"]');
|
||
|
||
items.forEach((item) => {
|
||
const title = DOMUtils.getText('h2 a span', item);
|
||
const priceText = DOMUtils.getText('.a-price-whole', item);
|
||
const price = this.parsePrice(priceText);
|
||
const asin = item.getAttribute('data-asin') || '';
|
||
|
||
results.push({
|
||
platform: 'AMAZON',
|
||
asin,
|
||
title,
|
||
price,
|
||
url: `https://www.amazon.com/dp/${asin}`,
|
||
});
|
||
});
|
||
|
||
return results;
|
||
}
|
||
|
||
/**
|
||
* 导航到刊登页面
|
||
*/
|
||
async navigateToListing(): Promise<void> {
|
||
window.location.href = 'https://sellercentral.amazon.com/inventory/add';
|
||
}
|
||
|
||
/**
|
||
* 填写刊登表单
|
||
*/
|
||
async fillListingForm(product: ProductData): Promise<void> {
|
||
// 等待页面加载
|
||
await DOMUtils.waitForElement('#title', 10000);
|
||
|
||
// 填写标题
|
||
const titleInput = document.querySelector('#title') as HTMLInputElement;
|
||
if (titleInput) {
|
||
EventUtils.simulateInput(titleInput, product.title);
|
||
}
|
||
|
||
// 填写价格
|
||
const priceInput = document.querySelector('#price') as HTMLInputElement;
|
||
if (priceInput) {
|
||
EventUtils.simulateInput(priceInput, product.price.toString());
|
||
}
|
||
|
||
// 填写描述
|
||
const descInput = document.querySelector('#description') as HTMLTextAreaElement;
|
||
if (descInput) {
|
||
EventUtils.simulateInput(descInput, product.description);
|
||
}
|
||
}
|
||
|
||
private parsePrice(priceText: string): number {
|
||
const match = priceText.replace(/[^\d.]/g, '').match(/(\d+\.?\d*)/);
|
||
return match ? parseFloat(match[1]) : 0;
|
||
}
|
||
}
|
||
```
|
||
|
||
### 3.2 eBay 交互
|
||
|
||
```typescript
|
||
// src/content/interactions/ebayInteraction.ts
|
||
|
||
export class EbayInteraction {
|
||
/**
|
||
* 获取商品详情
|
||
*/
|
||
async getProductDetails(): Promise<ProductData> {
|
||
const title = DOMUtils.getText('h1[data-testid="x-item-title-label"]');
|
||
const priceText = DOMUtils.getText('.notranslate.vi-price .notranslate');
|
||
const price = this.parsePrice(priceText);
|
||
|
||
const images = DOMUtils.getImageUrls('#icImg, .vi-image-gallery__image');
|
||
const description = DOMUtils.getText('#desc_wrapper');
|
||
const itemId = this.extractItemId();
|
||
|
||
return {
|
||
platform: 'EBAY',
|
||
itemId,
|
||
title,
|
||
price,
|
||
images,
|
||
description,
|
||
url: window.location.href,
|
||
};
|
||
}
|
||
|
||
private extractItemId(): string {
|
||
const match = window.location.pathname.match(/\/(\d+)$/);
|
||
return match ? match[1] : '';
|
||
}
|
||
|
||
private parsePrice(priceText: string): number {
|
||
const match = priceText.replace(/[^\d.]/g, '').match(/(\d+\.?\d*)/);
|
||
return match ? parseFloat(match[1]) : 0;
|
||
}
|
||
}
|
||
```
|
||
|
||
---
|
||
|
||
## 4. 异常处理
|
||
|
||
### 4.1 选择器失效处理
|
||
|
||
```typescript
|
||
export class SelectorFallback {
|
||
/**
|
||
* 尝试多个选择器
|
||
*/
|
||
static trySelectors(selectors: string[]): string {
|
||
for (const selector of selectors) {
|
||
const text = DOMUtils.getText(selector);
|
||
if (text) return text;
|
||
}
|
||
return '';
|
||
}
|
||
|
||
/**
|
||
* 使用 XPath 作为备选
|
||
*/
|
||
static queryXPath(xpath: string): string {
|
||
const result = document.evaluate(
|
||
xpath,
|
||
document,
|
||
null,
|
||
XPathResult.STRING_TYPE,
|
||
null
|
||
);
|
||
return result.stringValue || '';
|
||
}
|
||
}
|
||
```
|
||
|
||
### 4.2 页面变化检测
|
||
|
||
```typescript
|
||
export class PageChangeDetector {
|
||
private lastContent: string = '';
|
||
private checkInterval: number = 1000;
|
||
|
||
start(callback: () => void): void {
|
||
setInterval(() => {
|
||
const currentContent = document.body.innerHTML;
|
||
if (currentContent !== this.lastContent) {
|
||
this.lastContent = currentContent;
|
||
callback();
|
||
}
|
||
}, this.checkInterval);
|
||
}
|
||
}
|
||
```
|
||
|
||
---
|
||
|
||
## 5. 相关文档
|
||
|
||
- [Plugin Design](./Plugin_Design.md)
|
||
- [Automation Scripts](./Automation_Scripts.md)
|
||
|
||
---
|
||
|
||
*本文档基于实际平台页面结构,最后更新: 2026-03-18*
|