Files
makemd/docs/04_Plugin/DOM_Interaction.md
wurenzhi 72cd7f6f45 chore: 清理归档文件和文档模板
删除不再需要的归档文件和过时的文档模板,包括多个README、安全策略、前端集成蓝图等文件,同时移除了未使用的业务文档和项目结构文件。

优化项目结构,移除冗余文件,保持代码库整洁。主要删除archive/handover目录下的多个文件及doc目录下的部分文档模板。
2026-03-18 01:21:15 +08:00

11 KiB
Raw Blame History

DOM Interaction (Crawlful Hub Plugin)

定位Crawlful Hub 插件 DOM 交互文档 - 描述如何与电商平台页面进行 DOM 交互。 更新日期: 2026-03-18


1. DOM 选择器策略

1.1 选择器类型

类型 示例 适用场景
ID #productTitle 唯一元素
Class .a-price-whole 样式类
Attribute [data-asin] 数据属性
XPath //span[@id='price'] 复杂结构
CSS Selector div.product > h1 层级关系

1.2 平台选择器映射

Amazon

const amazonSelectors = {
  product: {
    title: '#productTitle',
    price: '.a-price-whole, .a-price .a-offscreen',
    listPrice: '.a-text-price .a-offscreen',
    images: '#landingImage, #imgTagWrapperId img',
    description: '#feature-bullets ul, #productDescription',
    brand: '#bylineInfo',
    asin: '[data-asin]',
    rating: '#acrPopover .a-icon-alt',
    reviewCount: '#acrCustomerReviewText',
    availability: '#availability span',
    category: '#wayfinding-breadcrumbs_container ul',
  },
  search: {
    results: '[data-component-type="s-search-result"]',
    title: 'h2 a span',
    price: '.a-price-whole',
    image: '.s-image',
    rating: '.a-icon-alt',
  },
  seller: {
    name: '#merchant-info a:first-child',
    rating: '#merchant-info .a-icon-alt',
  },
};

eBay

const ebaySelectors = {
  product: {
    title: 'h1[data-testid="x-item-title-label"]',
    price: '.notranslate.vi-price .notranslate',
    images: '#icImg, .vi-image-gallery__image',
    description: '#desc_wrapper, #ds_div',
    condition: '.u-flL.condText',
    seller: '.mbg-nw',
    sellerRating: '.mbg-l .mbg-fb',
    quantity: '#qtyTextBox',
    shipping: '#fshippingCost span',
  },
  search: {
    results: '.s-item',
    title: '.s-item__title',
    price: '.s-item__price',
    image: '.s-item__image img',
  },
};

Shopify

const shopifySelectors = {
  product: {
    title: 'h1.product-title, h1[data-product-title]',
    price: '.product-price, [data-product-price]',
    comparePrice: '.compare-price, [data-compare-price]',
    images: '.product-image, .product__media img',
    description: '.product-description, [data-product-description]',
    variants: '[data-variant-id]',
    inventory: '[data-inventory]',
    sku: '[data-sku]',
    barcode: '[data-barcode]',
  },
  admin: {
    products: 'table tbody tr',
    orders: 'table tbody tr',
    title: 'td:first-child a',
    status: 'td:nth-child(3) span',
  },
};

2. DOM 操作工具

2.1 元素提取工具

// src/content/utils/domUtils.ts

export class DOMUtils {
  /**
   * 安全地获取元素文本
   */
  static getText(selector: string, context: Document | Element = document): string {
    const element = context.querySelector(selector);
    return element?.textContent?.trim() || '';
  }

  /**
   * 获取元素属性
   */
  static getAttr(selector: string, attr: string, context: Document | Element = document): string {
    const element = context.querySelector(selector);
    return element?.getAttribute(attr) || '';
  }

  /**
   * 获取多个元素的文本列表
   */
  static getTextList(selector: string, context: Document | Element = document): string[] {
    const elements = context.querySelectorAll(selector);
    return Array.from(elements).map(el => el.textContent?.trim() || '');
  }

  /**
   * 获取图片 URL 列表
   */
  static getImageUrls(selector: string, context: Document | Element = document): string[] {
    const images = context.querySelectorAll(selector);
    return Array.from(images)
      .map(img => {
        const src = img.getAttribute('src') || img.getAttribute('data-src');
        return src ? this.resolveUrl(src) : '';
      })
      .filter(Boolean);
  }

  /**
   * 等待元素出现
   */
  static waitForElement(
    selector: string,
    timeout: number = 10000,
    context: Document | Element = document
  ): Promise<Element | null> {
    return new Promise((resolve) => {
      const element = context.querySelector(selector);
      if (element) {
        resolve(element);
        return;
      }

      const observer = new MutationObserver(() => {
        const element = context.querySelector(selector);
        if (element) {
          observer.disconnect();
          resolve(element);
        }
      });

      observer.observe(document.body, {
        childList: true,
        subtree: true,
      });

      setTimeout(() => {
        observer.disconnect();
        resolve(null);
      }, timeout);
    });
  }

  /**
   * 解析相对 URL
   */
  private static resolveUrl(url: string): string {
    if (url.startsWith('http')) return url;
    if (url.startsWith('//')) return `https:${url}`;
    return new URL(url, window.location.href).href;
  }
}

2.2 事件监听工具

// src/content/utils/eventUtils.ts

export class EventUtils {
  /**
   * 监听页面变化
   */
  static onPageChange(callback: (url: string) => void): void {
    let currentUrl = window.location.href;
    
    const observer = new MutationObserver(() => {
      if (window.location.href !== currentUrl) {
        currentUrl = window.location.href;
        callback(currentUrl);
      }
    });

    observer.observe(document.body, {
      childList: true,
      subtree: true,
    });

    // 同时监听 popstate 事件
    window.addEventListener('popstate', () => {
      callback(window.location.href);
    });
  }

  /**
   * 监听元素出现
   */
  static onElementAppear(
    selector: string,
    callback: (element: Element) => void
  ): void {
    const observer = new MutationObserver((mutations) => {
      mutations.forEach((mutation) => {
        mutation.addedNodes.forEach((node) => {
          if (node instanceof Element) {
            if (node.matches(selector)) {
              callback(node);
            }
            node.querySelectorAll(selector).forEach(callback);
          }
        });
      });
    });

    observer.observe(document.body, {
      childList: true,
      subtree: true,
    });
  }

  /**
   * 模拟用户点击
   */
  static simulateClick(element: Element): void {
    const event = new MouseEvent('click', {
      bubbles: true,
      cancelable: true,
      view: window,
    });
    element.dispatchEvent(event);
  }

  /**
   * 模拟输入
   */
  static simulateInput(element: HTMLInputElement, value: string): void {
    element.value = value;
    element.dispatchEvent(new Event('input', { bubbles: true }));
    element.dispatchEvent(new Event('change', { bubbles: true }));
  }
}

3. 平台特定交互

3.1 Amazon 交互

// src/content/interactions/amazonInteraction.ts

export class AmazonInteraction {
  /**
   * 获取商品详情
   */
  async getProductDetails(): Promise<ProductData> {
    const title = DOMUtils.getText('#productTitle');
    const priceText = DOMUtils.getText('.a-price-whole, .a-price .a-offscreen');
    const price = this.parsePrice(priceText);
    
    const images = DOMUtils.getImageUrls('#landingImage, #imgTagWrapperId img');
    const description = DOMUtils.getText('#feature-bullets ul');
    const asin = DOMUtils.getAttr('[data-asin]', 'data-asin');
    
    return {
      platform: 'AMAZON',
      asin,
      title,
      price,
      images,
      description,
      url: window.location.href,
    };
  }

  /**
   * 获取搜索列表
   */
  async getSearchResults(): Promise<ProductData[]> {
    const results: ProductData[] = [];
    const items = document.querySelectorAll('[data-component-type="s-search-result"]');
    
    items.forEach((item) => {
      const title = DOMUtils.getText('h2 a span', item);
      const priceText = DOMUtils.getText('.a-price-whole', item);
      const price = this.parsePrice(priceText);
      const asin = item.getAttribute('data-asin') || '';
      
      results.push({
        platform: 'AMAZON',
        asin,
        title,
        price,
        url: `https://www.amazon.com/dp/${asin}`,
      });
    });
    
    return results;
  }

  /**
   * 导航到刊登页面
   */
  async navigateToListing(): Promise<void> {
    window.location.href = 'https://sellercentral.amazon.com/inventory/add';
  }

  /**
   * 填写刊登表单
   */
  async fillListingForm(product: ProductData): Promise<void> {
    // 等待页面加载
    await DOMUtils.waitForElement('#title', 10000);
    
    // 填写标题
    const titleInput = document.querySelector('#title') as HTMLInputElement;
    if (titleInput) {
      EventUtils.simulateInput(titleInput, product.title);
    }
    
    // 填写价格
    const priceInput = document.querySelector('#price') as HTMLInputElement;
    if (priceInput) {
      EventUtils.simulateInput(priceInput, product.price.toString());
    }
    
    // 填写描述
    const descInput = document.querySelector('#description') as HTMLTextAreaElement;
    if (descInput) {
      EventUtils.simulateInput(descInput, product.description);
    }
  }

  private parsePrice(priceText: string): number {
    const match = priceText.replace(/[^\d.]/g, '').match(/(\d+\.?\d*)/);
    return match ? parseFloat(match[1]) : 0;
  }
}

3.2 eBay 交互

// src/content/interactions/ebayInteraction.ts

export class EbayInteraction {
  /**
   * 获取商品详情
   */
  async getProductDetails(): Promise<ProductData> {
    const title = DOMUtils.getText('h1[data-testid="x-item-title-label"]');
    const priceText = DOMUtils.getText('.notranslate.vi-price .notranslate');
    const price = this.parsePrice(priceText);
    
    const images = DOMUtils.getImageUrls('#icImg, .vi-image-gallery__image');
    const description = DOMUtils.getText('#desc_wrapper');
    const itemId = this.extractItemId();
    
    return {
      platform: 'EBAY',
      itemId,
      title,
      price,
      images,
      description,
      url: window.location.href,
    };
  }

  private extractItemId(): string {
    const match = window.location.pathname.match(/\/(\d+)$/);
    return match ? match[1] : '';
  }

  private parsePrice(priceText: string): number {
    const match = priceText.replace(/[^\d.]/g, '').match(/(\d+\.?\d*)/);
    return match ? parseFloat(match[1]) : 0;
  }
}

4. 异常处理

4.1 选择器失效处理

export class SelectorFallback {
  /**
   * 尝试多个选择器
   */
  static trySelectors(selectors: string[]): string {
    for (const selector of selectors) {
      const text = DOMUtils.getText(selector);
      if (text) return text;
    }
    return '';
  }

  /**
   * 使用 XPath 作为备选
   */
  static queryXPath(xpath: string): string {
    const result = document.evaluate(
      xpath,
      document,
      null,
      XPathResult.STRING_TYPE,
      null
    );
    return result.stringValue || '';
  }
}

4.2 页面变化检测

export class PageChangeDetector {
  private lastContent: string = '';
  private checkInterval: number = 1000;

  start(callback: () => void): void {
    setInterval(() => {
      const currentContent = document.body.innerHTML;
      if (currentContent !== this.lastContent) {
        this.lastContent = currentContent;
        callback();
      }
    }, this.checkInterval);
  }
}

5. 相关文档


本文档基于实际平台页面结构,最后更新: 2026-03-18