Decodo爬虫API工作流模板(n8n自动化亚马逊图书购买报告)
高级
这是一个AI Summarization, Multimodal AI领域的自动化工作流,包含 22 个节点。主要使用 Set, Code, Slack, GoogleDrive, HttpRequest 等节点。 使用Decodo爬虫和GPT 4.1 mini提取亚马逊图书数据并生成购买报告
前置要求
- •Slack Bot Token 或 Webhook URL
- •Google Drive API 凭证
- •可能需要目标 API 的认证凭证
- •OpenAI API Key
使用的节点 (22)
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
"id": "26kLwujfcYNYROSS",
"meta": {
"instanceId": "4a2e6764ba7a6bc9890d9225f4b21d570ce88fc9bd57549c89057fcee58fed0f",
"templateCredsSetupCompleted": true
},
"name": "Decodo Scraper API Workflow Template (n8n Automation Amazon Book Purchase Report)",
"tags": [
{
"id": "LAIfxZMd3ZR7hJo6",
"name": "Decodo",
"createdAt": "2025-09-02T04:14:42.812Z",
"updatedAt": "2025-09-02T04:14:42.812Z"
}
],
"nodes": [
{
"id": "e2a6dee1-55c0-4407-841b-6bbbfddebc86",
"name": "When clicking ‘Execute workflow’",
"type": "n8n-nodes-base.manualTrigger",
"position": [
0,
-16
],
"parameters": {},
"typeVersion": 1
},
{
"id": "6f8cea7d-d8f3-456f-81cd-6213b95e2d5d",
"name": "OpenAI Chat Model",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
928,
208
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-4.1-mini"
},
"options": {}
},
"credentials": {
"openAiApi": {
"id": "PPSwAKeLQYgAPobT",
"name": "OpenAi account"
}
},
"typeVersion": 1.2
},
{
"id": "db22753b-8bc1-468c-9705-61067a63bae2",
"name": "Structured Output Parser",
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
"position": [
1056,
208
],
"parameters": {
"jsonSchemaExample": "[{\n \"asin\": \"0399501487\",\n \"title\": \"Lord of the Flies\",\n \"author\": \"William Golding\",\n \"rank\": 50,\n \"category\": \"Literature & Fiction\",\n \"sub_category\": \"Classics\",\n \"rating\": 4.6,\n \"ratings_count\": 25600,\n \"price\": {\n \"currency\": \"USD\",\n \"amount\": 9.99,\n \"format\": \"Paperback\"\n },\n \"url\": \"https://www.amazon.com/dp/0399501487\",\n \"publisher\": \"Penguin\",\n \"publication_date\": \"1959-04-15\",\n \"language\": \"English\",\n \"pages\": 224\n}\n]"
},
"typeVersion": 1.3
},
{
"id": "158b0532-e82f-4044-b0a5-84b1b0ea7ffe",
"name": "Create document file",
"type": "n8n-nodes-base.httpRequest",
"position": [
1744,
-16
],
"parameters": {
"url": "https://www.googleapis.com/upload/drive/v3/files?uploadType=multipart&supportsAllDrives=true",
"body": "=--foo_bar_baz\nContent-Type: application/json; charset=UTF-8\n\n{\n \"name\": \"{{ $json.Today }}\",\n \"mimeType\": \"application/vnd.google-apps.document\",\n \"parents\": [\"{{ $json['Drive Folder ID'] }}\"]\n}\n\n--foo_bar_baz\nContent-Type: text/markdown; charset=UTF-8\n\n{{ $('Build 📚 Book Purchase Report').item.json.markdown }}\n\n--foo_bar_baz--",
"method": "POST",
"options": {},
"sendBody": true,
"sendQuery": true,
"contentType": "raw",
"sendHeaders": true,
"authentication": "predefinedCredentialType",
"rawContentType": "multipart/related; boundary=foo_bar_baz",
"queryParameters": {
"parameters": [
{
"name": "uploadType",
"value": "multipart"
},
{
"name": "supportsAllDrives",
"value": "true"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "boundary",
"value": "foo_bar_baz"
}
]
},
"nodeCredentialType": "googleDriveOAuth2Api"
},
"credentials": {
"googleDriveOAuth2Api": {
"id": "fC471es5gk5Mm900",
"name": "Google Drive account"
}
},
"typeVersion": 4.2
},
{
"id": "fc816728-71ab-4e0e-948f-b49852644086",
"name": "Convert document to PDF",
"type": "n8n-nodes-base.googleDrive",
"position": [
1968,
-16
],
"parameters": {
"fileId": {
"__rl": true,
"mode": "id",
"value": "={{ $json.id }}"
},
"options": {
"googleFileConversion": {
"conversion": {
"docsToFormat": "application/pdf"
}
}
},
"operation": "download"
},
"credentials": {
"googleDriveOAuth2Api": {
"id": "fC471es5gk5Mm900",
"name": "Google Drive account"
}
},
"typeVersion": 3
},
{
"id": "47b8d51c-5d4f-453c-a562-e1a3f0c94715",
"name": "Configure Google Drive Folder ",
"type": "n8n-nodes-base.set",
"position": [
1520,
-16
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "1ff0b9a4-7d60-44ec-b047-e49252f1ace9",
"name": "Drive Folder ID",
"type": "string",
"value": "1IPcko8bzogO3W4mxhrW2Q017QA0Lc5MI"
},
{
"id": "d64a1ac4-15db-4c84-a1db-fbd6b48084f5",
"name": "Today",
"type": "string",
"value": "={{ $now.format(\"ddMMyyyyhhmmss\") }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "8d06b26e-c55b-4846-b9a9-0e974b056fd9",
"name": "Product Analyzer Agent",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
896,
-16
],
"parameters": {
"text": "=Get top 10 best selling book from the below web content:\n{{ $json.text }}",
"options": {
"systemMessage": "You are a helpful assistant to parse the HTML content and output as well-structure JSON"
},
"promptType": "define",
"hasOutputParser": true
},
"typeVersion": 2.1
},
{
"id": "0b6859b7-220e-432c-be18-a0931d15108d",
"name": "Scraper API Request",
"type": "n8n-nodes-base.httpRequest",
"position": [
448,
-16
],
"parameters": {
"url": "https://scraper-api.decodo.com/v2/scrape",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "url",
"value": "={{ $json.url }}"
},
{
"name": "device_type",
"value": "desktop"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "Accept",
"value": "application/json"
},
{
"name": "Authorization",
"value": "={{ $json.Authenticate_Token }}"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "6e74af84-70a1-4184-8ded-0592d40f1587",
"name": "Edit Fields",
"type": "n8n-nodes-base.set",
"position": [
224,
-16
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "391aaecd-88c0-4943-9417-2d9fc6bc50b9",
"name": "Authenticate_Token",
"type": "string",
"value": "Get token from your Decodo dashboard (https://dashboard.decodo.com/web-scraping-api/scraper)"
},
{
"id": "859e5162-ef18-454a-9819-c1b0f2800b3f",
"name": "url",
"type": "string",
"value": "https://www.amazon.com/Best-Sellers-Books/zgbs/books"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "4416d72c-ac02-46ca-82d7-c896d7a30c7d",
"name": "HTML Response Parser",
"type": "n8n-nodes-base.code",
"position": [
672,
-16
],
"parameters": {
"jsCode": "// n8n Code node (JavaScript)\n// Input: $input.first().json.results[0].content\n// Output: clean plain text (no HTML/JS/CSS, minimal \\n)\n\nfunction stripAll(html) {\n if (typeof html !== 'string') return '';\n\n // Remove scripts, styles, head, comments, svg, noscript, canvas\n html = html.replace(/<script[\\s\\S]*?<\\/script>/gi, '');\n html = html.replace(/<style[\\s\\S]*?<\\/style>/gi, '');\n html = html.replace(/<head[\\s\\S]*?<\\/head>/gi, '');\n html = html.replace(/<noscript[\\s\\S]*?<\\/noscript>/gi, '');\n html = html.replace(/<svg[\\s\\S]*?<\\/svg>/gi, '');\n html = html.replace(/<canvas[\\s\\S]*?<\\/canvas>/gi, '');\n html = html.replace(/<!--[\\s\\S]*?-->/g, '');\n\n // Replace block-level tags with a single newline\n const blockTags = [\n 'p','div','section','article','header','footer','nav','aside','main',\n 'h1','h2','h3','h4','h5','h6','ul','ol','li','table','tr','td','th','br','hr'\n ];\n for (const tag of blockTags) {\n const rxOpen = new RegExp(`<${tag}[^>]*>`, 'gi');\n const rxClose = new RegExp(`</${tag}>`, 'gi');\n html = html.replace(rxOpen, '\\n');\n html = html.replace(rxClose, '\\n');\n }\n\n // Strip all remaining tags\n let text = html.replace(/<\\/?[^>]+>/g, '');\n\n // Decode common HTML entities\n text = text\n .replace(/ /g, ' ')\n .replace(/&/g, '&')\n .replace(/</g, '<')\n .replace(/>/g, '>')\n .replace(/"/g, '\"')\n .replace(/'/g, \"'\");\n\n // Clean whitespace\n text = text\n .replace(/\\r/g, '')\n .replace(/[ \\t]+/g, ' ') // collapse spaces/tabs\n .replace(/\\n[ \\t]+/g,'') // trim spaces after newlines\n .replace(/\\n{3,}/g, '') // collapse 3+ newlines into 2\n .trim();\n \n return text;\n}\n\n// MAIN\nconst html = $input.first().json?.results?.[0]?.content || '';\nif (!html) {\n return [{ json: { error: 'No HTML found at json.results[0].content' } }];\n}\n\nconst text = stripAll(html);\n\nreturn [{\n json: {\n text,\n chars: text.length\n }\n}];"
},
"typeVersion": 2
},
{
"id": "36cc67cd-75a8-442b-be19-18c6883c0509",
"name": "Upload report to Slack ",
"type": "n8n-nodes-base.slack",
"position": [
2192,
-16
],
"webhookId": "4f70c69a-8b0e-4446-973c-f14e6479fcae",
"parameters": {
"options": {
"fileName": "=Book Purchase Report {{ $today.format('yyyy-MM-dd') }}",
"channelId": "C0989EJ7Z6K",
"initialComment": "📚 Book Purchase Report"
},
"resource": "file",
"authentication": "oAuth2"
},
"credentials": {
"slackOAuth2Api": {
"id": "4JSKt9sIRV1KGswQ",
"name": "Slack account"
}
},
"typeVersion": 2.3
},
{
"id": "85e63b80-bd08-4c1f-8677-d4c87274ddd4",
"name": "Build 📚 Book Purchase Report",
"type": "n8n-nodes-base.code",
"position": [
1296,
-16
],
"parameters": {
"jsCode": "// n8n Code node (JavaScript)\n// Input shape expected:\n// items[0].json.output = [ { title, author, rank, category, sub_category, rating, ratings_count, price:{currency, amount, format}, ... }, ... ]\n\nfunction median(nums) {\n if (!nums.length) return 0;\n const arr = [...nums].sort((a, b) => a - b);\n const mid = Math.floor(arr.length / 2);\n return arr.length % 2 ? arr[mid] : (arr[mid - 1] + arr[mid]) / 2;\n}\n\nfunction sum(nums) {\n return nums.reduce((a, b) => a + (Number.isFinite(b) ? b : 0), 0);\n}\n\nfunction mean(nums) {\n const valid = nums.filter(n => Number.isFinite(n));\n return valid.length ? sum(valid) / valid.length : 0;\n}\n\nfunction fmtMoney(n, currency = \"USD\") {\n if (!Number.isFinite(n)) n = 0;\n try {\n return new Intl.NumberFormat(\"en-US\", { style: \"currency\", currency }).format(n);\n } catch {\n return `$${n.toFixed(2)}`;\n }\n}\n\nfunction pad(str, len) {\n return (str + '').padEnd(len, ' ');\n}\n\nfunction repeat(s, n) {\n return Array.from({ length: n }, () => s).join('');\n}\n\nfunction safe(v, d='') { return (v === null || v === undefined) ? d : v; }\n\nfunction asBar(value, max, width = 20) {\n if (max <= 0) return '';\n const filled = Math.round((value / max) * width);\n return repeat('▉', filled);\n}\n\nconst now = new Date();\nconst dateStr = now.toLocaleString('en-US', { year:'numeric', month:'short', day:'2-digit' });\n\nconst inp = $input.all();\nconst books = (inp?.[0]?.json?.output && Array.isArray(inp[0].json.output))\n ? inp[0].json.output\n : Array.isArray(inp?.[0]?.json) ? inp[0].json : [];\n\nconst clean = books.map(b => ({\n title: safe(b.title, 'N/A'),\n author: safe(b.author, 'Unknown'),\n rank: Number.isFinite(b.rank) ? b.rank : null,\n category: safe(b.category, 'N/A'),\n sub_category: safe(b.sub_category, 'N/A'),\n rating: Number.isFinite(b.rating) ? b.rating : 0,\n ratings_count: Number.isFinite(b.ratings_count) ? b.ratings_count : 0,\n price_amount: Number.isFinite(b?.price?.amount) ? b.price.amount : 0,\n price_currency: safe(b?.price?.currency, 'USD'),\n price_format: safe(b?.price?.format, 'Unknown'),\n}));\n\n// --- Core metrics ---\nconst totalBooks = clean.length;\nconst currency = clean.find(b => b.price_currency)?.price_currency || 'USD';\nconst prices = clean.map(b => b.price_amount).filter(n => Number.isFinite(n) && n >= 0);\nconst totalSpend = sum(prices);\nconst avgPrice = mean(prices);\nconst medPrice = median(prices);\nconst minPrice = prices.length ? Math.min(...prices) : 0;\nconst maxPrice = prices.length ? Math.max(...prices) : 0;\n\nconst rated = clean.filter(b => b.rating > 0);\nconst avgRating = rated.length ? mean(rated.map(b => b.rating)) : 0;\nconst ratingCoverage = rated.length;\nconst unratedCount = totalBooks - ratingCoverage;\n\n// category/subcategory counts\nconst catCount = {};\nconst subCatCount = {};\nfor (const b of clean) {\n catCount[b.category] = (catCount[b.category] || 0) + 1;\n subCatCount[b.sub_category] = (subCatCount[b.sub_category] || 0) + 1;\n}\n\n// formats\nconst formatCount = {};\nfor (const b of clean) {\n formatCount[b.price_format] = (formatCount[b.price_format] || 0) + 1;\n}\n\n// top by popularity (ratings_count) and by rating (>=50 ratings)\nconst topByRatingsCount = [...clean].sort((a,b) => b.ratings_count - a.ratings_count).slice(0,5);\nconst topByRating = [...clean]\n .filter(b => b.ratings_count >= 50 && b.rating > 0)\n .sort((a,b) => b.rating - a.rating)\n .slice(0,5);\n\n// price histogram (simple buckets)\nconst bucketSize = 5; // $5 buckets\nconst buckets = {};\nfor (const p of prices) {\n const b = Math.floor(p / bucketSize) * bucketSize;\n const label = `${fmtMoney(b, currency)}–${fmtMoney(b + bucketSize - 0.01, currency)}`;\n buckets[label] = (buckets[label] || 0) + 1;\n}\nconst bucketEntries = Object.entries(buckets).sort((a,b) => {\n // sort by numeric lower bound\n const n = s => Number(s[0].replace(/[^0-9.]/g, ''));\n return n(a) - n(b);\n});\nconst maxBucket = bucketEntries.length ? Math.max(...bucketEntries.map(([_, v]) => v)) : 0;\n\n// category table (top 5)\nconst topCats = Object.entries(catCount).sort((a,b) => b[1]-a[1]).slice(0,5);\nconst topSubCats = Object.entries(subCatCount).sort((a,b) => b[1]-a[1]).slice(0,5);\nconst topFormats = Object.entries(formatCount).sort((a,b) => b[1]-a[1]).slice(0,5);\n\n// build tables\nfunction makeKpiRow(label, value) {\n return `| ${label} | ${value} |`;\n}\n\nfunction tableHeader(cols) {\n return `| ${cols.join(' | ')} |\\n| ${cols.map(()=>'---').join(' | ')} |`;\n}\n\nfunction bookRow(b) {\n const title = b.title.length > 72 ? b.title.slice(0,69) + '…' : b.title;\n return `| ${safe(b.rank,'–')} | ${title} | ${b.author} | ${fmtMoney(b.price_amount, b.price_currency)} | ${b.rating || '–'} | ${b.ratings_count} |`;\n}\n\nconst kpiTable = [\n tableHeader(['Metric','Value']),\n makeKpiRow('Total Books', totalBooks),\n makeKpiRow('Total Spend (list prices)', fmtMoney(totalSpend, currency)),\n makeKpiRow('Avg Price', fmtMoney(avgPrice, currency)),\n makeKpiRow('Median Price', fmtMoney(medPrice, currency)),\n makeKpiRow('Price Range', `${fmtMoney(minPrice, currency)} – ${fmtMoney(maxPrice, currency)}`),\n makeKpiRow('Avg Rating (rated only)', rated.length ? avgRating.toFixed(2) : '–'),\n makeKpiRow('Rated Titles', `${ratingCoverage}/${totalBooks}`),\n].join('\\n');\n\nconst popularityTable = [\n tableHeader(['Rank','Title','Author','Price','Rating','#Ratings']),\n ...topByRatingsCount.map(bookRow),\n].join('\\n');\n\nconst qualityTable = [\n tableHeader(['Rank','Title','Author','Price','Rating','#Ratings']),\n ...topByRating.map(bookRow),\n].join('\\n');\n\nfunction kvTable(title, entries) {\n const rows = entries.map(([k,v]) => `| ${k} | ${v} |`).join('\\n');\n return `**${title}**\\n\\n${tableHeader(['Key','Count'])}\\n${rows}`;\n}\n\nconst catTable = kvTable('Top Categories', topCats);\nconst subCatTable = kvTable('Top Sub-Categories', topSubCats);\nconst fmtTable = kvTable('Formats', topFormats);\n\n// histogram block\nconst histLines = bucketEntries.map(([label, count]) => {\n const bar = asBar(count, maxBucket, 24);\n return `- ${pad(label, 22)} | ${pad(count, 3)} ${bar}`;\n}).join('\\n');\n\nconst highlights = [];\nif (topByRatingsCount[0]) {\n highlights.push(`**Most talked-about**: _${topByRatingsCount[0].title}_ (${topByRatingsCount[0].ratings_count.toLocaleString()} ratings).`);\n}\nif (topByRating[0]) {\n highlights.push(`**Highest rated (≥50 ratings)**: _${topByRating[0].title}_ (${topByRating[0].rating.toFixed(1)}★).`);\n}\nif (topFormats[0]) {\n highlights.push(`**Preferred format**: ${topFormats[0][0]} (${topFormats[0][1]} titles).`);\n}\nif (topCats[0]) {\n highlights.push(`**Top category**: ${topCats[0][0]} (${topCats[0][1]} titles).`);\n}\n\nconst recs = [];\nif (unratedCount > 0) {\n recs.push(`Fill in missing ratings for ${unratedCount} titles to improve quality insights.`);\n}\nif (prices.some(p => p === 0)) {\n const zeroPrices = prices.filter(p => p === 0).length;\n recs.push(`Assign prices to ${zeroPrices} title(s) with \\$0 to avoid skewing spend/price stats.`);\n}\nif (clean.some(b => !b.rank)) {\n recs.push(`Confirm ranks for all titles to refine top-N comparisons.`);\n}\n\nconst markdown = `\n# 📚 Book Purchase Report\n**Date:** ${dateStr}\n\n> A quick, readable snapshot of your current book list with pricing, ratings, and category insights.\n\n---\n\n## 🔎 Executive KPIs\n${kpiTable}\n\n---\n\n## ⭐ Most Popular (by #Ratings)\n${popularityTable}\n\n---\n\n## 🏆 Highest Rated (≥ 50 ratings)\n${qualityTable}\n\n---\n\n## 🧩 Breakdown\n${catTable}\n\n${subCatTable}\n\n${fmtTable}\n\n---\n\n## 💲 Price Distribution (bucket = \\$${bucketSize})\n\\`\\`\\`\n${histLines || 'No price data available.'}\n\\`\\`\\`\n\n---\n\n## ✨ Highlights\n${highlights.length ? highlights.map(x => `- ${x}`).join('\\n') : '- No highlights available yet.'}\n\n---\n\n## ✅ Suggestions\n${(recs.length ? recs : ['All good!']).map(x => `- ${x}`).join('\\n')}\n`.trim();\n\n// Also expose structured stats if you want to reuse downstream\nconst stats = {\n totalBooks,\n currency,\n price: {\n totalSpend,\n avgPrice,\n medianPrice: medPrice,\n minPrice,\n maxPrice,\n },\n ratings: {\n avgRating: rated.length ? Number(avgRating.toFixed(3)) : null,\n ratedCount: ratingCoverage,\n unratedCount,\n },\n counts: {\n byCategory: catCount,\n bySubCategory: subCatCount,\n byFormat: formatCount,\n },\n};\n\nreturn [\n {\n json: {\n markdown,\n stats,\n }\n }\n];"
},
"typeVersion": 2
},
{
"id": "545fa265-c575-4b59-9919-6556e27b19c3",
"name": "Sticky Note",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1152,
-752
],
"parameters": {
"width": 1040,
"height": 2672,
"content": "# Decodo Scraper API Workflow Template (n8n Automation Amazon Book Purchase Report)\n\n> This workflow demos how to use **Decodo Scraper API** to crawl any public web page (headless JS, device emulation: mobile/desktop/tablet), extract structured product data from the returned HTML, generate a **purchase-ready report**, and automatically deliver it as a **Google Doc + PDF** to Slack/Drive.\n## Who’s it for\n- **Creators / Analysts** who need quick product lists (books, gadgets, etc.) with prices/ratings.\n- **Ops & Marketing teams** building weekly “top picks” reports.\n- **Engineers** validating the Decodo Scraper API + LLM extraction pattern before scaling.\n\n## How it works / What it does\n\n1. **Trigger** – Manually run the workflow.\n2. **Edit Fields (manual)** – Provide inputs:\n - `targetUrl` (e.g., an Amazon category/search/listing page)\n - `deviceType` (`desktop` | `mobile` | `tablet`)\n - Optional: `maxItems`, `notes`, `reportTitle`, `reportOwner`\n3. **Scraper API Request (HTTP Request → POST)** \n Calls **Decodo Scraper API** with:\n - URL to crawl, **headless JS** enabled\n - **Device emulation** (UA + viewport)\n - Optional **waitFor / executeJS** to ensure late-loading content is captured\n4. **HTML Response Parser (Code/Function or HTML node)** \n Pulls the HTML string from Decodo response and normalizes it (strip scripts/styles, collapse whitespace).\n5. **Product Analyzer Agent (LLM + Structured Output Parser)** \n Prompts an LLM to extract **structured “book” objects** from the HTML:\n The **Structured Output Parser** enforces a strict JSON schema and drops malformed items.\n6. **Build 📚 Book Purchase Report (Code/LLM)** \n Converts the JSON array into a **Markdown** (or HTML) report with:\n - Executive summary (top picks, average price/rating)\n - Table of items (rank, title, author, price, rating, link)\n - “Recommended to buy” shortlist (rules configurable)\n - Notes / owner / timestamp\n7. **Configure Google Drive Folder (manual)** \n Choose/create a Drive folder for output artifacts.\n8. **Create Document File (Google Docs API)** \n Creates a Doc from the generated Markdown/HTML.\n9. **Convert Document to PDF (Google Drive export)** \n Exports the Doc to PDF.\n10. **Upload report to Slack** \n Sends the PDF (and/or Doc link) to a chosen Slack channel with a short summary.\n\n## How to set up\n\n### 1 Prerequisites\n- **n8n** (self-hosted or Cloud)\n- **Decodo Scraper API** key\n- **OpenAI (or compatible) API key** for the Analyzer Agent\n- **Google Drive/Docs** credentials (OAuth2)\n- **Slack** Bot/User token (files:write, chat:write)\n\n### 2 Environment variables (recommended)\n- `DECODO_API_KEY`\n- `OPENAI_API_KEY`\n- `DRIVE_FOLDER_ID` (optional default)\n- `SLACK_CHANNEL_ID`\n\n### 3 Nodes configuration (high level)\n**Edit Fields (Set node)**\n**Scraper API Request (HTTP Request → POST)**\n**HTML Response Parser (Code node)**\n**Product Analyzer Agent**\n**Build Book Purchase Report (Code/LLM)**\n**Create Document File**\n**Convert to PDF**\n**Upload to Slack**\n\n## Requirements\n\n- **Decodo**: Active API key and endpoint access. Be mindful of concurrency/rate limits.\n- **Model**: GPT-4o/4.1-mini or similar for reliable structured extraction.\n- **Google**: OAuth client (Docs/Drive scopes). Ensure n8n can write to the target folder.\n- **Slack**: Bot token with `files:write` + `chat:write`.\n\n## How to customize the workflow\n\n- **Target site**: Change `targetUrl` to any **public** page (category, search, or listing). \n For other domains (not Amazon), tweak the **LLM guidance** (e.g., price/label patterns).\n- **Device emulation**: Switch `deviceType` to `mobile` to fetch mobile-optimized markup (often simpler DOMs).\n- **Late-loading pages**: Adjust `waitFor.selector` or use `waitUntil: \"networkidle\"` (if supported) to ensure full content loads.\n- **Client-side JS**: Extend `executeJS` if you need to interact (scroll, click “next”, expand sections). You can also loop over pagination by iterating URLs.\n- **Extraction schema**: Add fields (e.g., `discount_percent`, `bestseller_badge`, `prime_eligible`) and update the Structured Output schema accordingly.\n- **Filtering rules**: Modify recommendation logic (e.g., min ratings count, price bands, languages).\n- **Report branding**: Add logo, cover page, footer with company info; switch to HTML + inline CSS for richer Docs formatting.\n- **Destinations**: Besides Slack & Drive, add Email, Notion, Confluence, or a database sink.\n- **Scheduling**: Add a **Cron** trigger for weekly/monthly auto-reports."
},
"typeVersion": 1
},
{
"id": "1fa69470-0a75-45b4-8701-58c384d35a42",
"name": "Sticky Note1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-32,
-208
],
"parameters": {
"color": 5,
"width": 272,
"content": "### 1. Trigger Workflow Execution \nThe workflow starts manually by clicking **Execute workflow**. This allows users to control when the Amazon book data scraping and report generation begins.\n"
},
"typeVersion": 1
},
{
"id": "bb34c2ae-5133-4465-8955-87804b5a1ce8",
"name": "Sticky Note2",
"type": "n8n-nodes-base.stickyNote",
"position": [
128,
176
],
"parameters": {
"color": 5,
"width": 272,
"content": "### 2. Edit Input Fields \nSet the required fields such as `targetUrl` (Amazon book listing page), `deviceType` (desktop or mobile), and report details (title, owner, notes). These values define the scope and context of the report."
},
"typeVersion": 1
},
{
"id": "5010d9c6-502b-4b71-b43a-e81b9f9445f9",
"name": "Sticky Note3",
"type": "n8n-nodes-base.stickyNote",
"position": [
352,
-208
],
"parameters": {
"color": 5,
"width": 272,
"content": "### 3. Send Scraper API Request (Decodo) \nAn HTTP POST request is sent to **Decodo Scraper API**, which crawls the target Amazon page using headless JavaScript and device emulation. This ensures all product data loads as it appears to real users."
},
"typeVersion": 1
},
{
"id": "2923973f-9b5f-4e0b-9dc1-a750a8a28ba5",
"name": "Sticky Note4",
"type": "n8n-nodes-base.stickyNote",
"position": [
576,
192
],
"parameters": {
"color": 5,
"width": 272,
"content": "### 4. Parse HTML Response \nThe raw HTML returned by Decodo is cleaned and normalized. Scripts, styles, and unnecessary tags are removed, leaving only the meaningful page content for analysis."
},
"typeVersion": 1
},
{
"id": "e10b101a-d2e7-4274-b47d-95862b903513",
"name": "Sticky Note5",
"type": "n8n-nodes-base.stickyNote",
"position": [
896,
-224
],
"parameters": {
"color": 5,
"width": 288,
"content": "### 5. Product Analyzer Agent (LLM) \nAn AI agent processes the cleaned HTML and extracts **structured book data** (title, author, price, rating, ASIN, etc.) into JSON format. The structured output parser guarantees consistent schema."
},
"typeVersion": 1
},
{
"id": "e7345128-92d7-40a3-862b-ed25d06309f1",
"name": "Sticky Note6",
"type": "n8n-nodes-base.stickyNote",
"position": [
1296,
160
],
"parameters": {
"color": 5,
"width": 288,
"content": "### 6. Build Book Purchase Report \nThe extracted JSON is converted into a **human-readable purchase report**. The report includes a summary, detailed book table, top recommendations, and additional notes."
},
"typeVersion": 1
},
{
"id": "8959f9cc-9a4b-4a8f-a5d3-7a2144780bb2",
"name": "Sticky Note7",
"type": "n8n-nodes-base.stickyNote",
"position": [
1616,
-192
],
"parameters": {
"color": 5,
"width": 352,
"height": 128,
"content": "### Create ceport Book Purchase Report PDF\n- Configure Google Drive Folder \n- Create Google Document \n- Convert Document to PDF "
},
"typeVersion": 1
},
{
"id": "d8d80396-cbaf-4eae-a16e-6704c9f5fc74",
"name": "Sticky Note8",
"type": "n8n-nodes-base.stickyNote",
"position": [
2096,
160
],
"parameters": {
"color": 5,
"width": 288,
"height": 144,
"content": "### 10. Upload Report to Slack \nFinally, the PDF report is uploaded to a Slack channel. This enables instant distribution to teams, ensuring everyone has access to the latest Amazon book purchase insights."
},
"typeVersion": 1
},
{
"id": "c4d42d97-6909-49e0-9edf-7c4d65f37e0b",
"name": "Sticky Note9",
"type": "n8n-nodes-base.stickyNote",
"position": [
2448,
-48
],
"parameters": {
"width": 320,
"height": 144,
"content": "## Sample output report from crawl data\nhttps://s3.ap-southeast-1.amazonaws.com/automatewith.me/Book+Purchase+Report+2025-09-02"
},
"typeVersion": 1
}
],
"active": false,
"pinData": {},
"settings": {
"executionOrder": "v1"
},
"versionId": "14ac45c9-5507-4a84-8278-5e1f55d52df0",
"connections": {
"6e74af84-70a1-4184-8ded-0592d40f1587": {
"main": [
[
{
"node": "0b6859b7-220e-432c-be18-a0931d15108d",
"type": "main",
"index": 0
}
]
]
},
"6f8cea7d-d8f3-456f-81cd-6213b95e2d5d": {
"ai_languageModel": [
[
{
"node": "8d06b26e-c55b-4846-b9a9-0e974b056fd9",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"0b6859b7-220e-432c-be18-a0931d15108d": {
"main": [
[
{
"node": "4416d72c-ac02-46ca-82d7-c896d7a30c7d",
"type": "main",
"index": 0
}
]
]
},
"158b0532-e82f-4044-b0a5-84b1b0ea7ffe": {
"main": [
[
{
"node": "fc816728-71ab-4e0e-948f-b49852644086",
"type": "main",
"index": 0
}
]
]
},
"4416d72c-ac02-46ca-82d7-c896d7a30c7d": {
"main": [
[
{
"node": "8d06b26e-c55b-4846-b9a9-0e974b056fd9",
"type": "main",
"index": 0
}
]
]
},
"8d06b26e-c55b-4846-b9a9-0e974b056fd9": {
"main": [
[
{
"node": "85e63b80-bd08-4c1f-8677-d4c87274ddd4",
"type": "main",
"index": 0
}
]
]
},
"fc816728-71ab-4e0e-948f-b49852644086": {
"main": [
[
{
"node": "36cc67cd-75a8-442b-be19-18c6883c0509",
"type": "main",
"index": 0
}
]
]
},
"db22753b-8bc1-468c-9705-61067a63bae2": {
"ai_outputParser": [
[
{
"node": "8d06b26e-c55b-4846-b9a9-0e974b056fd9",
"type": "ai_outputParser",
"index": 0
}
]
]
},
"47b8d51c-5d4f-453c-a562-e1a3f0c94715": {
"main": [
[
{
"node": "158b0532-e82f-4044-b0a5-84b1b0ea7ffe",
"type": "main",
"index": 0
}
]
]
},
"85e63b80-bd08-4c1f-8677-d4c87274ddd4": {
"main": [
[
{
"node": "47b8d51c-5d4f-453c-a562-e1a3f0c94715",
"type": "main",
"index": 0
}
]
]
},
"e2a6dee1-55c0-4407-841b-6bbbfddebc86": {
"main": [
[
{
"node": "6e74af84-70a1-4184-8ded-0592d40f1587",
"type": "main",
"index": 0
}
]
]
}
}
}常见问题
如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
高级 - AI 摘要总结, 多模态 AI
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
AWS 自动化 SSL/TLS 证书过期报告
使用 AWS ACM 和 AI 为 Slack 和邮件生成 SSL/TLS 证书过期报告
Set
Code
Slack
+
Set
Code
Slack
23 节点Trung Tran
AI 摘要总结
构建用于Slack候选人评估的AI驱动聊天机器人
AI简历分析与候选人评估:Slack和Google表格集成
If
Code
Slack
+
If
Code
Slack
29 节点Trung Tran
AI 聊天机器人
🎧 IT语音支持自动化机器人 – Telegram语音消息转JIRA工单(使用OpenAI Whisper)
通过Whisper和GPT-4.1 Mini将Telegram语音消息自动转换为JIRA工单
If
Set
Code
+
If
Set
Code
25 节点Trung Tran
工单管理
多智能体架构免费入门模板
协作销售规划:多智能体AI、Google文档和Slack
Set
Slack
Google Drive
+
Set
Slack
Google Drive
24 节点Trung Tran
文档提取
n8n 中的免费 PDF 生成器 – 无需外部库或付费服务
使用 OpenAI、Google Docs 和 Slack 生成并分享专业 PDF
Set
Slack
Google Drive
+
Set
Slack
Google Drive
20 节点Trung Tran
文档提取
HireMind – AI驱动的简历智能处理流程
HR AI简历筛选与评估:GPT-4和Google Workspace
If
Code
Slack
+
If
Code
Slack
26 节点Trung Tran
人力资源
工作流信息
难度等级
高级
节点数量22
分类2
节点类型10
作者
Trung Tran
@trungtranEmpowering small and medium businesses with smart automation and practical AI, no big tech team required. Youtube channel: youtube.com/@theStackExplorer
外部链接
在 n8n.io 查看 →
分享此工作流