Scrape.do, GPT-4 및 Google Sheets를 사용하여 아마존 제품 데이터 추출
이것은Market Research, AI Summarization분야의자동화 워크플로우로, 11개의 노드를 포함합니다.주로 Html, SplitOut, HttpRequest, GoogleSheets, ManualTrigger 등의 노드를 사용하며. Scrape.do, GPT-4, Google Sheets를 사용하여 아마존 제품 데이터를 추출합니다.
- •대상 API의 인증 정보가 필요할 수 있음
- •Google Sheets API 인증 정보
- •OpenAI API Key
사용된 노드 (11)
{
"meta": {
"instanceId": "cb5caf45c9475b848c7e83772505bb02340e165acdd8de77e25011192306257c",
"templateCredsSetupCompleted": true
},
"nodes": [
{
"id": "c499851d-09d6-4a25-812e-c1d3efa3f0a8",
"name": "워크플로우 테스트 클릭 시",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-1648,
272
],
"parameters": {},
"typeVersion": 1
},
{
"id": "80562cea-7422-44ec-9886-1928bb8f81f1",
"name": "OpenAI Chat Model",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
-624,
336
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-4o-mini"
},
"options": {
"maxTokens": 500,
"temperature": 0,
"responseFormat": "json_object"
}
},
"typeVersion": 1.2
},
{
"id": "da77ba7c-a40c-4d79-91f1-fd485d101f76",
"name": "Structured Output Parser",
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
"position": [
-288,
304
],
"parameters": {
"schemaType": "manual",
"inputSchema": "{\n \"type\": \"object\",\n \"properties\": {\n \"name\": { \n \"type\": \"string\", \n \"description\": \"Product name/title\" \n },\n \"description\": { \n \"type\": \"string\", \n \"description\": \"Product description or key features\" \n },\n \"rating\": { \n \"type\": [\"number\", \"null\"], \n \"description\": \"Average rating (e.g., 4.5)\" \n },\n \"reviews\": { \n \"type\": [\"integer\", \"null\"], \n \"description\": \"Number of reviews\" \n },\n \"price\": { \n \"type\": [\"string\", \"null\"], \n \"description\": \"Product price with currency\" \n }\n },\n \"required\": [\"name\"]\n}"
},
"typeVersion": 1.3
},
{
"id": "daf15a88-7d2f-4542-b3f0-c3658960cb22",
"name": "1. Google Sheets에서 제품 URL 가져오기",
"type": "n8n-nodes-base.googleSheets",
"position": [
-1392,
272
],
"parameters": {
"options": {},
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit#gid=0",
"cachedResultName": "Sheet1"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit?usp=drivesdk",
"cachedResultName": "Amazon Product List"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "df8r9D022KIAOHTC",
"name": "Google Sheets account"
}
},
"typeVersion": 4.7
},
{
"id": "41e494b5-f3e9-48dd-8c7b-0096790df02b",
"name": "2. 각 URL을 순회 처리",
"type": "n8n-nodes-base.splitInBatches",
"position": [
-1168,
272
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "c588ede7-1689-492d-a863-949ade5ffe33",
"name": "3. 제품 페이지 HTML 스크레이핑",
"type": "n8n-nodes-base.httpRequest",
"position": [
-960,
128
],
"parameters": {
"url": "=https://api.scrape.do/?token={{$vars.SCRAPEDO_TOKEN}}&url={{ encodeURIComponent($json.url) }}&geoCode=us&render=false",
"options": {
"timeout": 60000,
"response": {
"response": {}
}
}
},
"typeVersion": 4.2
},
{
"id": "818b6ea9-b259-4d67-bfb9-f02366da89c1",
"name": "4. 원시 데이터 요소 추출",
"type": "n8n-nodes-base.html",
"position": [
-752,
128
],
"parameters": {
"options": {},
"operation": "extractHtmlContent",
"extractionValues": {
"values": [
{
"key": "productTitle",
"cssSelector": "#productTitle, h1[data-automation-id=\"product-title\"], .product-title"
},
{
"key": "price",
"cssSelector": ".a-price .a-offscreen, .a-price-whole, .a-price-fraction, .priceToPay .a-price .a-offscreen"
},
{
"key": "rating",
"cssSelector": ".a-icon-alt, [data-hook=\"average-star-rating\"], .a-star-medium .a-icon-alt"
},
{
"key": "reviewCount",
"cssSelector": "[data-hook=\"total-review-count\"], .a-link-normal[href*=\"customerReviews\"], #acrCustomerReviewText"
},
{
"key": "featureBullets",
"cssSelector": "#feature-bullets ul, .a-unordered-list.a-nostyle.a-vertical.feature"
},
{
"key": "productDescription",
"cssSelector": "#productDescription, [data-feature-name=\"productDescription\"], .product-description"
}
]
}
},
"typeVersion": 1.2
},
{
"id": "2c491fda-9510-46f9-973a-754587601b7c",
"name": "5. AI로 데이터 정리 및 구조화",
"type": "@n8n/n8n-nodes-langchain.chainLlm",
"position": [
-512,
128
],
"parameters": {
"text": "={{ JSON.stringify($json, null, 2) }}",
"batching": {},
"messages": {
"messageValues": [
{
"message": "Extract Amazon product data and return ONLY valid JSON.\n\nInput: {{ $json }}\n\nExtract:\n- name: product title from productTitle\n- description: create from featureBullets OR productDescription (max 150 chars, if empty use \"No description\")\n- rating: extract number from rating (e.g. \"4.5 out of 5\" → 4.5, if no rating use null)\n- reviews: extract number from reviewCount (e.g. \"1,234 ratings\" → 1234, if none use null)\n- price: format price from price field (add $ if missing, if no price use null)\n\nReturn exact JSON:\n{\n \"name\": \"product title here\",\n \"description\": \"description here or No description\",\n \"rating\": 4.5,\n \"reviews\": 1234,\n \"price\": \"$29.99\"\n}"
}
]
},
"promptType": "define",
"hasOutputParser": true
},
"typeVersion": 1.7
},
{
"id": "7796a70c-99a4-4e6e-b18a-5c63adc90871",
"name": "6. 최종 JSON 출력 형식 지정",
"type": "n8n-nodes-base.splitOut",
"position": [
-128,
128
],
"parameters": {
"include": "selectedOtherFields",
"options": {},
"fieldToSplitOut": "output",
"fieldsToInclude": "output.name, output.description, output.rating, output.reviews, output.price"
},
"typeVersion": 1
},
{
"id": "7c3d7a0e-4d59-41e0-bdc8-87005237d8a9",
"name": "7. 제품 데이터를 Google Sheets에 저장",
"type": "n8n-nodes-base.googleSheets",
"position": [
80,
272
],
"parameters": {
"columns": {
"value": {},
"schema": [],
"mappingMode": "autoMapInputData",
"matchingColumns": [],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {
"useAppend": true
},
"operation": "append",
"sheetName": {
"__rl": true,
"mode": "list",
"value": 838351250,
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit#gid=838351250",
"cachedResultName": "Sheet2"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit?usp=drivesdk",
"cachedResultName": "Amazon Product List"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "df8r9D022KIAOHTC",
"name": "Google Sheets account"
}
},
"typeVersion": 4.7
},
{
"id": "1d3b653a-e5d8-4e88-a210-15224c6282c1",
"name": "Sticky Note1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2272,
-144
],
"parameters": {
"width": 528,
"height": 896,
"content": "## Amazon Scraper with Scrape.do API\n\n### Setup Instructions:\n\n1. **Get Scrape.do API Token:**\n - Sign up at https://scrape.do\n - Get your API token from the dashboard\n\n2. **Set up Workflow Variables:**\n - SCRAPEDO_TOKEN: Your Scrape.do API token\n - WEB_SHEET_ID: Google Sheet document ID\n - TRACK_SHEET_GID: Sheet name/ID with URLs to scrape\n - RESULTS_SHEET_GID: Sheet name/ID for results\n\n3. **Google Sheets Setup:**\n - Create a Google Sheet with two tabs\n - First tab: Add Amazon product URLs in a column named 'url'\n - Second tab: Will store results (name, description, rating, reviews, price)\n - Share the sheet with your service account email\n\n4. **Credentials:**\n - Add Google Sheets OAuth2 credentials\n - Add OpenRouter API credentials (for GPT-4)\n\n### Features:\n- Uses Scrape.do to bypass Amazon's anti-bot protection\n- Extracts product data using pattern matching and AI\n- Handles pagination with Split In Batches\n- Saves structured data to Google Sheets\n\n### Scrape.do Advantages:\n- No need for complex proxy rotation\n- Automatic CAPTCHA handling\n- Better success rate than BrightData\n- Simple API integration"
},
"typeVersion": 1
}
],
"pinData": {},
"connections": {
"80562cea-7422-44ec-9886-1928bb8f81f1": {
"ai_languageModel": [
[
{
"node": "2c491fda-9510-46f9-973a-754587601b7c",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"41e494b5-f3e9-48dd-8c7b-0096790df02b": {
"main": [
[],
[
{
"node": "c588ede7-1689-492d-a863-949ade5ffe33",
"type": "main",
"index": 0
}
]
]
},
"da77ba7c-a40c-4d79-91f1-fd485d101f76": {
"ai_outputParser": [
[
{
"node": "2c491fda-9510-46f9-973a-754587601b7c",
"type": "ai_outputParser",
"index": 0
}
]
]
},
"c588ede7-1689-492d-a863-949ade5ffe33": {
"main": [
[
{
"node": "818b6ea9-b259-4d67-bfb9-f02366da89c1",
"type": "main",
"index": 0
}
]
]
},
"7796a70c-99a4-4e6e-b18a-5c63adc90871": {
"main": [
[
{
"node": "7c3d7a0e-4d59-41e0-bdc8-87005237d8a9",
"type": "main",
"index": 0
}
]
]
},
"c499851d-09d6-4a25-812e-c1d3efa3f0a8": {
"main": [
[
{
"node": "daf15a88-7d2f-4542-b3f0-c3658960cb22",
"type": "main",
"index": 0
}
]
]
},
"818b6ea9-b259-4d67-bfb9-f02366da89c1": {
"main": [
[
{
"node": "2c491fda-9510-46f9-973a-754587601b7c",
"type": "main",
"index": 0
}
]
]
},
"2c491fda-9510-46f9-973a-754587601b7c": {
"main": [
[
{
"node": "7796a70c-99a4-4e6e-b18a-5c63adc90871",
"type": "main",
"index": 0
}
]
]
},
"7c3d7a0e-4d59-41e0-bdc8-87005237d8a9": {
"main": [
[
{
"node": "41e494b5-f3e9-48dd-8c7b-0096790df02b",
"type": "main",
"index": 0
}
]
]
},
"daf15a88-7d2f-4542-b3f0-c3658960cb22": {
"main": [
[
{
"node": "41e494b5-f3e9-48dd-8c7b-0096790df02b",
"type": "main",
"index": 0
}
]
]
}
}
}이 워크플로우를 어떻게 사용하나요?
위의 JSON 구성 코드를 복사하여 n8n 인스턴스에서 새 워크플로우를 생성하고 "JSON에서 가져오기"를 선택한 후, 구성을 붙여넣고 필요에 따라 인증 설정을 수정하세요.
이 워크플로우는 어떤 시나리오에 적합한가요?
중급 - 시장 조사, AI 요약
유료인가요?
이 워크플로우는 완전히 무료이며 직접 가져와 사용할 수 있습니다. 다만, 워크플로우에서 사용하는 타사 서비스(예: OpenAI API)는 사용자 직접 비용을 지불해야 할 수 있습니다.
관련 워크플로우 추천
Onur
@onurpolat05Hello, I'm Onur I've been working as a freelance software developer for about four years. In addition, I develop my own projects. For some time, I have been improving myself and providing various services related to AI and AI workflows. Both by writing low code and code. If you have any questions, don't hesitate to contact me.
이 워크플로우 공유