8
n8n 中文网amn8n.com

Decodo爬虫API工作流模板(n8n自动化亚马逊图书购买报告)

高级

这是一个AI Summarization, Multimodal AI领域的自动化工作流,包含 22 个节点。主要使用 Set, Code, Slack, GoogleDrive, HttpRequest 等节点。 使用Decodo爬虫和GPT 4.1 mini提取亚马逊图书数据并生成购买报告

前置要求
  • Slack Bot Token 或 Webhook URL
  • Google Drive API 凭证
  • 可能需要目标 API 的认证凭证
  • OpenAI API Key
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "id": "26kLwujfcYNYROSS",
  "meta": {
    "instanceId": "4a2e6764ba7a6bc9890d9225f4b21d570ce88fc9bd57549c89057fcee58fed0f",
    "templateCredsSetupCompleted": true
  },
  "name": "Decodo Scraper API Workflow Template (n8n Automation Amazon Book Purchase Report)",
  "tags": [
    {
      "id": "LAIfxZMd3ZR7hJo6",
      "name": "Decodo",
      "createdAt": "2025-09-02T04:14:42.812Z",
      "updatedAt": "2025-09-02T04:14:42.812Z"
    }
  ],
  "nodes": [
    {
      "id": "e2a6dee1-55c0-4407-841b-6bbbfddebc86",
      "name": "When clicking ‘Execute workflow’",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        0,
        -16
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "6f8cea7d-d8f3-456f-81cd-6213b95e2d5d",
      "name": "OpenAI Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        928,
        208
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4.1-mini"
        },
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "id": "PPSwAKeLQYgAPobT",
          "name": "OpenAi account"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "db22753b-8bc1-468c-9705-61067a63bae2",
      "name": "Structured Output Parser",
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "position": [
        1056,
        208
      ],
      "parameters": {
        "jsonSchemaExample": "[{\n  \"asin\": \"0399501487\",\n  \"title\": \"Lord of the Flies\",\n  \"author\": \"William Golding\",\n  \"rank\": 50,\n  \"category\": \"Literature & Fiction\",\n  \"sub_category\": \"Classics\",\n  \"rating\": 4.6,\n  \"ratings_count\": 25600,\n  \"price\": {\n    \"currency\": \"USD\",\n    \"amount\": 9.99,\n    \"format\": \"Paperback\"\n  },\n  \"url\": \"https://www.amazon.com/dp/0399501487\",\n  \"publisher\": \"Penguin\",\n  \"publication_date\": \"1959-04-15\",\n  \"language\": \"English\",\n  \"pages\": 224\n}\n]"
      },
      "typeVersion": 1.3
    },
    {
      "id": "158b0532-e82f-4044-b0a5-84b1b0ea7ffe",
      "name": "Create document file",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1744,
        -16
      ],
      "parameters": {
        "url": "https://www.googleapis.com/upload/drive/v3/files?uploadType=multipart&supportsAllDrives=true",
        "body": "=--foo_bar_baz\nContent-Type: application/json; charset=UTF-8\n\n{\n  \"name\": \"{{ $json.Today }}\",\n  \"mimeType\": \"application/vnd.google-apps.document\",\n  \"parents\": [\"{{ $json['Drive Folder ID'] }}\"]\n}\n\n--foo_bar_baz\nContent-Type: text/markdown; charset=UTF-8\n\n{{ $('Build 📚 Book Purchase Report').item.json.markdown }}\n\n--foo_bar_baz--",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "sendQuery": true,
        "contentType": "raw",
        "sendHeaders": true,
        "authentication": "predefinedCredentialType",
        "rawContentType": "multipart/related; boundary=foo_bar_baz",
        "queryParameters": {
          "parameters": [
            {
              "name": "uploadType",
              "value": "multipart"
            },
            {
              "name": "supportsAllDrives",
              "value": "true"
            }
          ]
        },
        "headerParameters": {
          "parameters": [
            {
              "name": "boundary",
              "value": "foo_bar_baz"
            }
          ]
        },
        "nodeCredentialType": "googleDriveOAuth2Api"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "fC471es5gk5Mm900",
          "name": "Google Drive account"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "fc816728-71ab-4e0e-948f-b49852644086",
      "name": "Convert document to PDF",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        1968,
        -16
      ],
      "parameters": {
        "fileId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ $json.id }}"
        },
        "options": {
          "googleFileConversion": {
            "conversion": {
              "docsToFormat": "application/pdf"
            }
          }
        },
        "operation": "download"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "fC471es5gk5Mm900",
          "name": "Google Drive account"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "47b8d51c-5d4f-453c-a562-e1a3f0c94715",
      "name": "Configure Google Drive Folder ",
      "type": "n8n-nodes-base.set",
      "position": [
        1520,
        -16
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "1ff0b9a4-7d60-44ec-b047-e49252f1ace9",
              "name": "Drive Folder ID",
              "type": "string",
              "value": "1IPcko8bzogO3W4mxhrW2Q017QA0Lc5MI"
            },
            {
              "id": "d64a1ac4-15db-4c84-a1db-fbd6b48084f5",
              "name": "Today",
              "type": "string",
              "value": "={{ $now.format(\"ddMMyyyyhhmmss\") }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "8d06b26e-c55b-4846-b9a9-0e974b056fd9",
      "name": "Product Analyzer Agent",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        896,
        -16
      ],
      "parameters": {
        "text": "=Get top 10 best selling book from the below web content:\n{{ $json.text }}",
        "options": {
          "systemMessage": "You are a helpful assistant to parse the HTML content and output as well-structure JSON"
        },
        "promptType": "define",
        "hasOutputParser": true
      },
      "typeVersion": 2.1
    },
    {
      "id": "0b6859b7-220e-432c-be18-a0931d15108d",
      "name": "Scraper API Request",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        448,
        -16
      ],
      "parameters": {
        "url": "https://scraper-api.decodo.com/v2/scrape",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "sendHeaders": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "url",
              "value": "={{ $json.url }}"
            },
            {
              "name": "device_type",
              "value": "desktop"
            }
          ]
        },
        "headerParameters": {
          "parameters": [
            {
              "name": "Accept",
              "value": "application/json"
            },
            {
              "name": "Authorization",
              "value": "={{ $json.Authenticate_Token }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "6e74af84-70a1-4184-8ded-0592d40f1587",
      "name": "Edit Fields",
      "type": "n8n-nodes-base.set",
      "position": [
        224,
        -16
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "391aaecd-88c0-4943-9417-2d9fc6bc50b9",
              "name": "Authenticate_Token",
              "type": "string",
              "value": "Get token from your Decodo dashboard (https://dashboard.decodo.com/web-scraping-api/scraper)"
            },
            {
              "id": "859e5162-ef18-454a-9819-c1b0f2800b3f",
              "name": "url",
              "type": "string",
              "value": "https://www.amazon.com/Best-Sellers-Books/zgbs/books"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "4416d72c-ac02-46ca-82d7-c896d7a30c7d",
      "name": "HTML Response Parser",
      "type": "n8n-nodes-base.code",
      "position": [
        672,
        -16
      ],
      "parameters": {
        "jsCode": "// n8n Code node (JavaScript)\n// Input:  $input.first().json.results[0].content\n// Output: clean plain text (no HTML/JS/CSS, minimal \\n)\n\nfunction stripAll(html) {\n  if (typeof html !== 'string') return '';\n\n  // Remove scripts, styles, head, comments, svg, noscript, canvas\n  html = html.replace(/<script[\\s\\S]*?<\\/script>/gi, '');\n  html = html.replace(/<style[\\s\\S]*?<\\/style>/gi, '');\n  html = html.replace(/<head[\\s\\S]*?<\\/head>/gi, '');\n  html = html.replace(/<noscript[\\s\\S]*?<\\/noscript>/gi, '');\n  html = html.replace(/<svg[\\s\\S]*?<\\/svg>/gi, '');\n  html = html.replace(/<canvas[\\s\\S]*?<\\/canvas>/gi, '');\n  html = html.replace(/<!--[\\s\\S]*?-->/g, '');\n\n  // Replace block-level tags with a single newline\n  const blockTags = [\n    'p','div','section','article','header','footer','nav','aside','main',\n    'h1','h2','h3','h4','h5','h6','ul','ol','li','table','tr','td','th','br','hr'\n  ];\n  for (const tag of blockTags) {\n    const rxOpen  = new RegExp(`<${tag}[^>]*>`, 'gi');\n    const rxClose = new RegExp(`</${tag}>`, 'gi');\n    html = html.replace(rxOpen, '\\n');\n    html = html.replace(rxClose, '\\n');\n  }\n\n  // Strip all remaining tags\n  let text = html.replace(/<\\/?[^>]+>/g, '');\n\n  // Decode common HTML entities\n  text = text\n    .replace(/&nbsp;/g, ' ')\n    .replace(/&amp;/g, '&')\n    .replace(/&lt;/g, '<')\n    .replace(/&gt;/g, '>')\n    .replace(/&quot;/g, '\"')\n    .replace(/&#39;/g, \"'\");\n\n  // Clean whitespace\n  text = text\n    .replace(/\\r/g, '')\n    .replace(/[ \\t]+/g, ' ')       // collapse spaces/tabs\n    .replace(/\\n[ \\t]+/g,'')    // trim spaces after newlines\n    .replace(/\\n{3,}/g, '')    // collapse 3+ newlines into 2\n    .trim();\n  \n  return text;\n}\n\n// MAIN\nconst html = $input.first().json?.results?.[0]?.content || '';\nif (!html) {\n  return [{ json: { error: 'No HTML found at json.results[0].content' } }];\n}\n\nconst text = stripAll(html);\n\nreturn [{\n  json: {\n    text,\n    chars: text.length\n  }\n}];"
      },
      "typeVersion": 2
    },
    {
      "id": "36cc67cd-75a8-442b-be19-18c6883c0509",
      "name": "Upload report to Slack ",
      "type": "n8n-nodes-base.slack",
      "position": [
        2192,
        -16
      ],
      "webhookId": "4f70c69a-8b0e-4446-973c-f14e6479fcae",
      "parameters": {
        "options": {
          "fileName": "=Book Purchase Report {{ $today.format('yyyy-MM-dd') }}",
          "channelId": "C0989EJ7Z6K",
          "initialComment": "📚 Book Purchase Report"
        },
        "resource": "file",
        "authentication": "oAuth2"
      },
      "credentials": {
        "slackOAuth2Api": {
          "id": "4JSKt9sIRV1KGswQ",
          "name": "Slack account"
        }
      },
      "typeVersion": 2.3
    },
    {
      "id": "85e63b80-bd08-4c1f-8677-d4c87274ddd4",
      "name": "Build 📚 Book Purchase Report",
      "type": "n8n-nodes-base.code",
      "position": [
        1296,
        -16
      ],
      "parameters": {
        "jsCode": "// n8n Code node (JavaScript)\n// Input shape expected:\n// items[0].json.output = [ { title, author, rank, category, sub_category, rating, ratings_count, price:{currency, amount, format}, ... }, ... ]\n\nfunction median(nums) {\n  if (!nums.length) return 0;\n  const arr = [...nums].sort((a, b) => a - b);\n  const mid = Math.floor(arr.length / 2);\n  return arr.length % 2 ? arr[mid] : (arr[mid - 1] + arr[mid]) / 2;\n}\n\nfunction sum(nums) {\n  return nums.reduce((a, b) => a + (Number.isFinite(b) ? b : 0), 0);\n}\n\nfunction mean(nums) {\n  const valid = nums.filter(n => Number.isFinite(n));\n  return valid.length ? sum(valid) / valid.length : 0;\n}\n\nfunction fmtMoney(n, currency = \"USD\") {\n  if (!Number.isFinite(n)) n = 0;\n  try {\n    return new Intl.NumberFormat(\"en-US\", { style: \"currency\", currency }).format(n);\n  } catch {\n    return `$${n.toFixed(2)}`;\n  }\n}\n\nfunction pad(str, len) {\n  return (str + '').padEnd(len, ' ');\n}\n\nfunction repeat(s, n) {\n  return Array.from({ length: n }, () => s).join('');\n}\n\nfunction safe(v, d='') { return (v === null || v === undefined) ? d : v; }\n\nfunction asBar(value, max, width = 20) {\n  if (max <= 0) return '';\n  const filled = Math.round((value / max) * width);\n  return repeat('▉', filled);\n}\n\nconst now = new Date();\nconst dateStr = now.toLocaleString('en-US', { year:'numeric', month:'short', day:'2-digit' });\n\nconst inp = $input.all();\nconst books = (inp?.[0]?.json?.output && Array.isArray(inp[0].json.output))\n  ? inp[0].json.output\n  : Array.isArray(inp?.[0]?.json) ? inp[0].json : [];\n\nconst clean = books.map(b => ({\n  title: safe(b.title, 'N/A'),\n  author: safe(b.author, 'Unknown'),\n  rank: Number.isFinite(b.rank) ? b.rank : null,\n  category: safe(b.category, 'N/A'),\n  sub_category: safe(b.sub_category, 'N/A'),\n  rating: Number.isFinite(b.rating) ? b.rating : 0,\n  ratings_count: Number.isFinite(b.ratings_count) ? b.ratings_count : 0,\n  price_amount: Number.isFinite(b?.price?.amount) ? b.price.amount : 0,\n  price_currency: safe(b?.price?.currency, 'USD'),\n  price_format: safe(b?.price?.format, 'Unknown'),\n}));\n\n// --- Core metrics ---\nconst totalBooks = clean.length;\nconst currency = clean.find(b => b.price_currency)?.price_currency || 'USD';\nconst prices = clean.map(b => b.price_amount).filter(n => Number.isFinite(n) && n >= 0);\nconst totalSpend = sum(prices);\nconst avgPrice = mean(prices);\nconst medPrice = median(prices);\nconst minPrice = prices.length ? Math.min(...prices) : 0;\nconst maxPrice = prices.length ? Math.max(...prices) : 0;\n\nconst rated = clean.filter(b => b.rating > 0);\nconst avgRating = rated.length ? mean(rated.map(b => b.rating)) : 0;\nconst ratingCoverage = rated.length;\nconst unratedCount = totalBooks - ratingCoverage;\n\n// category/subcategory counts\nconst catCount = {};\nconst subCatCount = {};\nfor (const b of clean) {\n  catCount[b.category] = (catCount[b.category] || 0) + 1;\n  subCatCount[b.sub_category] = (subCatCount[b.sub_category] || 0) + 1;\n}\n\n// formats\nconst formatCount = {};\nfor (const b of clean) {\n  formatCount[b.price_format] = (formatCount[b.price_format] || 0) + 1;\n}\n\n// top by popularity (ratings_count) and by rating (>=50 ratings)\nconst topByRatingsCount = [...clean].sort((a,b) => b.ratings_count - a.ratings_count).slice(0,5);\nconst topByRating = [...clean]\n  .filter(b => b.ratings_count >= 50 && b.rating > 0)\n  .sort((a,b) => b.rating - a.rating)\n  .slice(0,5);\n\n// price histogram (simple buckets)\nconst bucketSize = 5; // $5 buckets\nconst buckets = {};\nfor (const p of prices) {\n  const b = Math.floor(p / bucketSize) * bucketSize;\n  const label = `${fmtMoney(b, currency)}–${fmtMoney(b + bucketSize - 0.01, currency)}`;\n  buckets[label] = (buckets[label] || 0) + 1;\n}\nconst bucketEntries = Object.entries(buckets).sort((a,b) => {\n  // sort by numeric lower bound\n  const n = s => Number(s[0].replace(/[^0-9.]/g, ''));\n  return n(a) - n(b);\n});\nconst maxBucket = bucketEntries.length ? Math.max(...bucketEntries.map(([_, v]) => v)) : 0;\n\n// category table (top 5)\nconst topCats = Object.entries(catCount).sort((a,b) => b[1]-a[1]).slice(0,5);\nconst topSubCats = Object.entries(subCatCount).sort((a,b) => b[1]-a[1]).slice(0,5);\nconst topFormats = Object.entries(formatCount).sort((a,b) => b[1]-a[1]).slice(0,5);\n\n// build tables\nfunction makeKpiRow(label, value) {\n  return `| ${label} | ${value} |`;\n}\n\nfunction tableHeader(cols) {\n  return `| ${cols.join(' | ')} |\\n| ${cols.map(()=>'---').join(' | ')} |`;\n}\n\nfunction bookRow(b) {\n  const title = b.title.length > 72 ? b.title.slice(0,69) + '…' : b.title;\n  return `| ${safe(b.rank,'–')} | ${title} | ${b.author} | ${fmtMoney(b.price_amount, b.price_currency)} | ${b.rating || '–'} | ${b.ratings_count} |`;\n}\n\nconst kpiTable = [\n  tableHeader(['Metric','Value']),\n  makeKpiRow('Total Books', totalBooks),\n  makeKpiRow('Total Spend (list prices)', fmtMoney(totalSpend, currency)),\n  makeKpiRow('Avg Price', fmtMoney(avgPrice, currency)),\n  makeKpiRow('Median Price', fmtMoney(medPrice, currency)),\n  makeKpiRow('Price Range', `${fmtMoney(minPrice, currency)} – ${fmtMoney(maxPrice, currency)}`),\n  makeKpiRow('Avg Rating (rated only)', rated.length ? avgRating.toFixed(2) : '–'),\n  makeKpiRow('Rated Titles', `${ratingCoverage}/${totalBooks}`),\n].join('\\n');\n\nconst popularityTable = [\n  tableHeader(['Rank','Title','Author','Price','Rating','#Ratings']),\n  ...topByRatingsCount.map(bookRow),\n].join('\\n');\n\nconst qualityTable = [\n  tableHeader(['Rank','Title','Author','Price','Rating','#Ratings']),\n  ...topByRating.map(bookRow),\n].join('\\n');\n\nfunction kvTable(title, entries) {\n  const rows = entries.map(([k,v]) => `| ${k} | ${v} |`).join('\\n');\n  return `**${title}**\\n\\n${tableHeader(['Key','Count'])}\\n${rows}`;\n}\n\nconst catTable = kvTable('Top Categories', topCats);\nconst subCatTable = kvTable('Top Sub-Categories', topSubCats);\nconst fmtTable = kvTable('Formats', topFormats);\n\n// histogram block\nconst histLines = bucketEntries.map(([label, count]) => {\n  const bar = asBar(count, maxBucket, 24);\n  return `- ${pad(label, 22)} | ${pad(count, 3)} ${bar}`;\n}).join('\\n');\n\nconst highlights = [];\nif (topByRatingsCount[0]) {\n  highlights.push(`**Most talked-about**: _${topByRatingsCount[0].title}_ (${topByRatingsCount[0].ratings_count.toLocaleString()} ratings).`);\n}\nif (topByRating[0]) {\n  highlights.push(`**Highest rated (≥50 ratings)**: _${topByRating[0].title}_ (${topByRating[0].rating.toFixed(1)}★).`);\n}\nif (topFormats[0]) {\n  highlights.push(`**Preferred format**: ${topFormats[0][0]} (${topFormats[0][1]} titles).`);\n}\nif (topCats[0]) {\n  highlights.push(`**Top category**: ${topCats[0][0]} (${topCats[0][1]} titles).`);\n}\n\nconst recs = [];\nif (unratedCount > 0) {\n  recs.push(`Fill in missing ratings for ${unratedCount} titles to improve quality insights.`);\n}\nif (prices.some(p => p === 0)) {\n  const zeroPrices = prices.filter(p => p === 0).length;\n  recs.push(`Assign prices to ${zeroPrices} title(s) with \\$0 to avoid skewing spend/price stats.`);\n}\nif (clean.some(b => !b.rank)) {\n  recs.push(`Confirm ranks for all titles to refine top-N comparisons.`);\n}\n\nconst markdown = `\n# 📚 Book Purchase Report\n**Date:** ${dateStr}\n\n> A quick, readable snapshot of your current book list with pricing, ratings, and category insights.\n\n---\n\n## 🔎 Executive KPIs\n${kpiTable}\n\n---\n\n## ⭐ Most Popular (by #Ratings)\n${popularityTable}\n\n---\n\n## 🏆 Highest Rated (≥ 50 ratings)\n${qualityTable}\n\n---\n\n## 🧩 Breakdown\n${catTable}\n\n${subCatTable}\n\n${fmtTable}\n\n---\n\n## 💲 Price Distribution (bucket = \\$${bucketSize})\n\\`\\`\\`\n${histLines || 'No price data available.'}\n\\`\\`\\`\n\n---\n\n## ✨ Highlights\n${highlights.length ? highlights.map(x => `- ${x}`).join('\\n') : '- No highlights available yet.'}\n\n---\n\n## ✅ Suggestions\n${(recs.length ? recs : ['All good!']).map(x => `- ${x}`).join('\\n')}\n`.trim();\n\n// Also expose structured stats if you want to reuse downstream\nconst stats = {\n  totalBooks,\n  currency,\n  price: {\n    totalSpend,\n    avgPrice,\n    medianPrice: medPrice,\n    minPrice,\n    maxPrice,\n  },\n  ratings: {\n    avgRating: rated.length ? Number(avgRating.toFixed(3)) : null,\n    ratedCount: ratingCoverage,\n    unratedCount,\n  },\n  counts: {\n    byCategory: catCount,\n    bySubCategory: subCatCount,\n    byFormat: formatCount,\n  },\n};\n\nreturn [\n  {\n    json: {\n      markdown,\n      stats,\n    }\n  }\n];"
      },
      "typeVersion": 2
    },
    {
      "id": "545fa265-c575-4b59-9919-6556e27b19c3",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1152,
        -752
      ],
      "parameters": {
        "width": 1040,
        "height": 2672,
        "content": "# Decodo Scraper API Workflow Template (n8n Automation Amazon Book Purchase Report)\n![](https://s3.ap-southeast-1.amazonaws.com/automatewith.me/how-to-use-scraper-api-with-n8n.jpg)\n> This workflow demos how to use **Decodo Scraper API** to crawl any public web page (headless JS, device emulation: mobile/desktop/tablet), extract structured product data from the returned HTML, generate a **purchase-ready report**, and automatically deliver it as a **Google Doc + PDF** to Slack/Drive.\n## Who’s it for\n- **Creators / Analysts** who need quick product lists (books, gadgets, etc.) with prices/ratings.\n- **Ops & Marketing teams** building weekly “top picks” reports.\n- **Engineers** validating the Decodo Scraper API + LLM extraction pattern before scaling.\n\n## How it works / What it does\n\n1. **Trigger** – Manually run the workflow.\n2. **Edit Fields (manual)** – Provide inputs:\n   - `targetUrl` (e.g., an Amazon category/search/listing page)\n   - `deviceType` (`desktop` | `mobile` | `tablet`)\n   - Optional: `maxItems`, `notes`, `reportTitle`, `reportOwner`\n3. **Scraper API Request (HTTP Request → POST)**  \n   Calls **Decodo Scraper API** with:\n   - URL to crawl, **headless JS** enabled\n   - **Device emulation** (UA + viewport)\n   - Optional **waitFor / executeJS** to ensure late-loading content is captured\n4. **HTML Response Parser (Code/Function or HTML node)**  \n   Pulls the HTML string from Decodo response and normalizes it (strip scripts/styles, collapse whitespace).\n5. **Product Analyzer Agent (LLM + Structured Output Parser)**  \n   Prompts an LLM to extract **structured “book” objects** from the HTML:\n   The **Structured Output Parser** enforces a strict JSON schema and drops malformed items.\n6. **Build 📚 Book Purchase Report (Code/LLM)**  \n   Converts the JSON array into a **Markdown** (or HTML) report with:\n   - Executive summary (top picks, average price/rating)\n   - Table of items (rank, title, author, price, rating, link)\n   - “Recommended to buy” shortlist (rules configurable)\n   - Notes / owner / timestamp\n7. **Configure Google Drive Folder (manual)**  \n   Choose/create a Drive folder for output artifacts.\n8. **Create Document File (Google Docs API)**  \n   Creates a Doc from the generated Markdown/HTML.\n9. **Convert Document to PDF (Google Drive export)**  \n   Exports the Doc to PDF.\n10. **Upload report to Slack**  \n   Sends the PDF (and/or Doc link) to a chosen Slack channel with a short summary.\n\n## How to set up\n\n### 1 Prerequisites\n- **n8n** (self-hosted or Cloud)\n- **Decodo Scraper API** key\n- **OpenAI (or compatible) API key** for the Analyzer Agent\n- **Google Drive/Docs** credentials (OAuth2)\n- **Slack** Bot/User token (files:write, chat:write)\n\n### 2 Environment variables (recommended)\n- `DECODO_API_KEY`\n- `OPENAI_API_KEY`\n- `DRIVE_FOLDER_ID` (optional default)\n- `SLACK_CHANNEL_ID`\n\n### 3 Nodes configuration (high level)\n**Edit Fields (Set node)**\n**Scraper API Request (HTTP Request → POST)**\n**HTML Response Parser (Code node)**\n**Product Analyzer Agent**\n**Build Book Purchase Report (Code/LLM)**\n**Create Document File**\n**Convert to PDF**\n**Upload to Slack**\n\n## Requirements\n\n- **Decodo**: Active API key and endpoint access. Be mindful of concurrency/rate limits.\n- **Model**: GPT-4o/4.1-mini or similar for reliable structured extraction.\n- **Google**: OAuth client (Docs/Drive scopes). Ensure n8n can write to the target folder.\n- **Slack**: Bot token with `files:write` + `chat:write`.\n\n## How to customize the workflow\n\n- **Target site**: Change `targetUrl` to any **public** page (category, search, or listing).  \n  For other domains (not Amazon), tweak the **LLM guidance** (e.g., price/label patterns).\n- **Device emulation**: Switch `deviceType` to `mobile` to fetch mobile-optimized markup (often simpler DOMs).\n- **Late-loading pages**: Adjust `waitFor.selector` or use `waitUntil: \"networkidle\"` (if supported) to ensure full content loads.\n- **Client-side JS**: Extend `executeJS` if you need to interact (scroll, click “next”, expand sections). You can also loop over pagination by iterating URLs.\n- **Extraction schema**: Add fields (e.g., `discount_percent`, `bestseller_badge`, `prime_eligible`) and update the Structured Output schema accordingly.\n- **Filtering rules**: Modify recommendation logic (e.g., min ratings count, price bands, languages).\n- **Report branding**: Add logo, cover page, footer with company info; switch to HTML + inline CSS for richer Docs formatting.\n- **Destinations**: Besides Slack & Drive, add Email, Notion, Confluence, or a database sink.\n- **Scheduling**: Add a **Cron** trigger for weekly/monthly auto-reports."
      },
      "typeVersion": 1
    },
    {
      "id": "1fa69470-0a75-45b4-8701-58c384d35a42",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -32,
        -208
      ],
      "parameters": {
        "color": 5,
        "width": 272,
        "content": "### 1. Trigger Workflow Execution  \nThe workflow starts manually by clicking **Execute workflow**. This allows users to control when the Amazon book data scraping and report generation begins.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "bb34c2ae-5133-4465-8955-87804b5a1ce8",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        128,
        176
      ],
      "parameters": {
        "color": 5,
        "width": 272,
        "content": "### 2. Edit Input Fields  \nSet the required fields such as `targetUrl` (Amazon book listing page), `deviceType` (desktop or mobile), and report details (title, owner, notes). These values define the scope and context of the report."
      },
      "typeVersion": 1
    },
    {
      "id": "5010d9c6-502b-4b71-b43a-e81b9f9445f9",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        352,
        -208
      ],
      "parameters": {
        "color": 5,
        "width": 272,
        "content": "### 3. Send Scraper API Request (Decodo)  \nAn HTTP POST request is sent to **Decodo Scraper API**, which crawls the target Amazon page using headless JavaScript and device emulation. This ensures all product data loads as it appears to real users."
      },
      "typeVersion": 1
    },
    {
      "id": "2923973f-9b5f-4e0b-9dc1-a750a8a28ba5",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        576,
        192
      ],
      "parameters": {
        "color": 5,
        "width": 272,
        "content": "### 4. Parse HTML Response  \nThe raw HTML returned by Decodo is cleaned and normalized. Scripts, styles, and unnecessary tags are removed, leaving only the meaningful page content for analysis."
      },
      "typeVersion": 1
    },
    {
      "id": "e10b101a-d2e7-4274-b47d-95862b903513",
      "name": "Sticky Note5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        896,
        -224
      ],
      "parameters": {
        "color": 5,
        "width": 288,
        "content": "### 5. Product Analyzer Agent (LLM)  \nAn AI agent processes the cleaned HTML and extracts **structured book data** (title, author, price, rating, ASIN, etc.) into JSON format. The structured output parser guarantees consistent schema."
      },
      "typeVersion": 1
    },
    {
      "id": "e7345128-92d7-40a3-862b-ed25d06309f1",
      "name": "Sticky Note6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1296,
        160
      ],
      "parameters": {
        "color": 5,
        "width": 288,
        "content": "### 6. Build Book Purchase Report  \nThe extracted JSON is converted into a **human-readable purchase report**. The report includes a summary, detailed book table, top recommendations, and additional notes."
      },
      "typeVersion": 1
    },
    {
      "id": "8959f9cc-9a4b-4a8f-a5d3-7a2144780bb2",
      "name": "Sticky Note7",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1616,
        -192
      ],
      "parameters": {
        "color": 5,
        "width": 352,
        "height": 128,
        "content": "### Create ceport Book Purchase Report PDF\n- Configure Google Drive Folder  \n- Create Google Document  \n-  Convert Document to PDF  "
      },
      "typeVersion": 1
    },
    {
      "id": "d8d80396-cbaf-4eae-a16e-6704c9f5fc74",
      "name": "Sticky Note8",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2096,
        160
      ],
      "parameters": {
        "color": 5,
        "width": 288,
        "height": 144,
        "content": "### 10. Upload Report to Slack  \nFinally, the PDF report is uploaded to a Slack channel. This enables instant distribution to teams, ensuring everyone has access to the latest Amazon book purchase insights."
      },
      "typeVersion": 1
    },
    {
      "id": "c4d42d97-6909-49e0-9edf-7c4d65f37e0b",
      "name": "Sticky Note9",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2448,
        -48
      ],
      "parameters": {
        "width": 320,
        "height": 144,
        "content": "## Sample output report from crawl data\nhttps://s3.ap-southeast-1.amazonaws.com/automatewith.me/Book+Purchase+Report+2025-09-02"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "14ac45c9-5507-4a84-8278-5e1f55d52df0",
  "connections": {
    "6e74af84-70a1-4184-8ded-0592d40f1587": {
      "main": [
        [
          {
            "node": "0b6859b7-220e-432c-be18-a0931d15108d",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "6f8cea7d-d8f3-456f-81cd-6213b95e2d5d": {
      "ai_languageModel": [
        [
          {
            "node": "8d06b26e-c55b-4846-b9a9-0e974b056fd9",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "0b6859b7-220e-432c-be18-a0931d15108d": {
      "main": [
        [
          {
            "node": "4416d72c-ac02-46ca-82d7-c896d7a30c7d",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "158b0532-e82f-4044-b0a5-84b1b0ea7ffe": {
      "main": [
        [
          {
            "node": "fc816728-71ab-4e0e-948f-b49852644086",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "4416d72c-ac02-46ca-82d7-c896d7a30c7d": {
      "main": [
        [
          {
            "node": "8d06b26e-c55b-4846-b9a9-0e974b056fd9",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "8d06b26e-c55b-4846-b9a9-0e974b056fd9": {
      "main": [
        [
          {
            "node": "85e63b80-bd08-4c1f-8677-d4c87274ddd4",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "fc816728-71ab-4e0e-948f-b49852644086": {
      "main": [
        [
          {
            "node": "36cc67cd-75a8-442b-be19-18c6883c0509",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "db22753b-8bc1-468c-9705-61067a63bae2": {
      "ai_outputParser": [
        [
          {
            "node": "8d06b26e-c55b-4846-b9a9-0e974b056fd9",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "47b8d51c-5d4f-453c-a562-e1a3f0c94715": {
      "main": [
        [
          {
            "node": "158b0532-e82f-4044-b0a5-84b1b0ea7ffe",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "85e63b80-bd08-4c1f-8677-d4c87274ddd4": {
      "main": [
        [
          {
            "node": "47b8d51c-5d4f-453c-a562-e1a3f0c94715",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "e2a6dee1-55c0-4407-841b-6bbbfddebc86": {
      "main": [
        [
          {
            "node": "6e74af84-70a1-4184-8ded-0592d40f1587",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

高级 - AI 摘要总结, 多模态 AI

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
高级
节点数量22
分类2
节点类型10
难度说明

适合高级用户,包含 16+ 个节点的复杂工作流

作者
Trung Tran

Trung Tran

@trungtran

Empowering small and medium businesses with smart automation and practical AI, no big tech team required. Youtube channel: youtube.com/@theStackExplorer

外部链接
在 n8n.io 查看

分享此工作流

分类

分类: 34