Utiliser Dumpling AI pour scraper le contenu des blogs de sites web et l'enregistrer dans Google Sheets
Ceci est unMarket Research, Multimodal AIworkflow d'automatisation du domainecontenant 11 nœuds.Utilise principalement des nœuds comme Set, Code, FormTrigger, HttpRequest, GoogleSheets. Gratter le contenu des blogs de sites web et le sauvegarder dans Google Sheets avec Dumpling AI
- •Peut nécessiter les informations d'identification d'authentification de l'API cible
- •Informations d'identification Google Sheets API
Nœuds utilisés (11)
Catégorie
{
"id": "VPtrkEUaljkq5VtD",
"meta": {
"instanceId": "a1ae5c8dc6c65e674f9c3947d083abcc749ef2546dff9f4ff01de4d6a36ebfe6",
"templateCredsSetupCompleted": true
},
"name": "Crawl Website Blog Content and Save to Google Sheets with Dumpling AI",
"tags": [],
"nodes": [
{
"id": "b9be7abb-d02c-4383-b18d-e40f77f833bf",
"name": "Soumission de formulaire",
"type": "n8n-nodes-base.formTrigger",
"position": [
-112,
0
],
"webhookId": "75c29ccf-8a5d-4d8c-95ca-74a838257011",
"parameters": {
"options": {},
"formTitle": "blog content strategy",
"formFields": {
"values": [
{
"fieldLabel": "Client URL",
"requiredField": true
}
]
}
},
"typeVersion": 2.2
},
{
"id": "e4c0b49d-ec38-4357-ad7c-bb47145b0cd5",
"name": "Créer une feuille d'audit de blog",
"type": "n8n-nodes-base.googleSheets",
"position": [
112,
0
],
"parameters": {
"title": "={{ $json[\"Client URL\"].trim().split(/›|>|»/)[0].trim().split(\".\")[0] }}",
"options": {},
"resource": "spreadsheet",
"sheetsUi": {
"sheetValues": [
{
"title": "Blog content audit"
}
]
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "GaJqJHuS5mQxap7q",
"name": "Google Sheets account"
}
},
"typeVersion": 4.6
},
{
"id": "1138d43b-14bb-4f9c-a6dc-f82cc965e5c4",
"name": "Définir les en-têtes de la feuille",
"type": "n8n-nodes-base.set",
"position": [
336,
0
],
"parameters": {
"values": {
"string": [
{
"name": "rows",
"value": "Url,Crawled_pages,website_content"
}
]
},
"options": {}
},
"typeVersion": 1
},
{
"id": "3630f64c-a9df-4c18-a668-822b5d0aed00",
"name": "Formater la ligne d'en-tête",
"type": "n8n-nodes-base.code",
"position": [
560,
0
],
"parameters": {
"jsCode": "return [\n {\n json: {\n data: [ $json.rows.split(',') ]\n }\n }\n];\n\n"
},
"typeVersion": 2
},
{
"id": "171ecc40-447d-4a64-adc8-f0d38d1d9cdf",
"name": "Insérer les en-têtes dans la feuille",
"type": "n8n-nodes-base.httpRequest",
"position": [
784,
0
],
"parameters": {
"url": "=https://sheets.googleapis.com/v4/spreadsheets/{{ $('Create Blog Audit Sheet').first().json.spreadsheetId }}/values/{{ $('Create Blog Audit Sheet').first().json.sheets[0].properties.title }}!A:Z",
"method": "PUT",
"options": {},
"sendBody": true,
"sendQuery": true,
"authentication": "predefinedCredentialType",
"bodyParameters": {
"parameters": [
{
"name": "range",
"value": "={{ $('Create Blog Audit Sheet').first().json.sheets[0].properties.title }}!A:Z"
},
{
"name": "values",
"value": "={{ $json.data }}"
}
]
},
"queryParameters": {
"parameters": [
{
"name": "valueInputOption",
"value": "RAW"
}
]
},
"nodeCredentialType": "googleSheetsOAuth2Api"
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "GaJqJHuS5mQxap7q",
"name": "Google Sheets account"
}
},
"typeVersion": 4.1
},
{
"id": "a1b24d76-a502-409a-b310-d4d3cc5e7c9a",
"name": "Dumpling AI : Explorer le site web",
"type": "n8n-nodes-base.httpRequest",
"position": [
1008,
0
],
"parameters": {
"url": "https://app.dumplingai.com/api/v1/crawl",
"method": "POST",
"options": {},
"sendBody": true,
"authentication": "genericCredentialType",
"bodyParameters": {
"parameters": [
{
"name": "url",
"value": "={{ $('Form Submission ').item.json[\"Client URL\"] }}"
},
{
"name": "limit",
"value": "=10"
}
]
},
"genericAuthType": "httpHeaderAuth"
},
"credentials": {
"httpHeaderAuth": {
"id": "RLFzAcGRepr5eXZB",
"name": "Dumpling AI-n8n"
}
},
"typeVersion": 4.2
},
{
"id": "bf50d679-1f16-4811-9e20-99eaccd9fc44",
"name": "Extraire les URL des blogs",
"type": "n8n-nodes-base.code",
"position": [
1232,
0
],
"parameters": {
"jsCode": "// Get all input items\nconst items = $input.all();\n\n// Convert everything to a string for searching\nconst fullDataString = JSON.stringify(items);\n\n// Find ALL URLs in the content\nconst urlPattern = /https?:\\/\\/[^\\s\\\"\\'<>\\(\\)\\[\\]]+/gi;\nconst allUrls = fullDataString.match(urlPattern) || [];\n\n// Clean and deduplicate URLs\nlet uniqueUrls = [...new Set(allUrls.map(url => \n url.replace(/[\\(\\)\\[\\]\\\"\\'<>\\\\]/g, '').trim()\n))];\n\n// Function to determine if a URL might be a blog post\nfunction isPotentialBlogPost(url) {\n // Common blog URL patterns\n const blogPatterns = [\n /\\/blog\\//i,\n /\\/post\\//i,\n /\\/posts\\//i,\n /\\/article\\//i,\n /\\/articles\\//i,\n /\\/news\\//i,\n /\\/insights\\//i,\n /\\/stories\\//i,\n /\\/resources\\/blog/i,\n /\\/\\d{4}\\/\\d{2}\\//,\n /\\/how-to-/i,\n /\\/guide-to-/i,\n /\\/tips/i,\n /\\/free-printable-art/i\n ];\n \n // Exclude patterns\n const excludePatterns = [\n /\\.(jpg|jpeg|png|gif|svg|css|js|pdf|zip)$/i,\n /\\/wp-content\\//i,\n /\\/cart\\//i,\n /\\/checkout/i,\n /\\/product\\//i,\n /\\/shop\\//i,\n /\\?add-to-cart=/i,\n /\\/my-account/i,\n /\\/collections?\\//i\n ];\n \n // Check excludes first\n for (const pattern of excludePatterns) {\n if (pattern.test(url)) {\n return false;\n }\n }\n \n // Check blog patterns\n for (const pattern of blogPatterns) {\n if (pattern.test(url)) {\n return true;\n }\n }\n \n return false;\n}\n\n// Filter URLs\nconst blogUrls = uniqueUrls.filter(url => url.includes('/blog'));\nconst potentialBlogPosts = uniqueUrls.filter(isPotentialBlogPost);\n\n// Combine and deduplicate\nconst allBlogRelatedUrls = [...new Set([...blogUrls, ...potentialBlogPosts])];\n\n// Sort URLs\nallBlogRelatedUrls.sort();\n\n// Return results\nif (allBlogRelatedUrls.length > 0) {\n return allBlogRelatedUrls.map(url => ({\n json: {\n blogUrl: url\n }\n }));\n} else {\n return [{\n json: {\n message: \"No blog URLs found\",\n totalUrlsChecked: uniqueUrls.length\n }\n }];\n}"
},
"typeVersion": 2
},
{
"id": "cf4d1e28-6dd4-46f5-bdde-6882b9bc59d9",
"name": "Dumpling AI : Extraire les pages de blog",
"type": "n8n-nodes-base.httpRequest",
"position": [
1456,
0
],
"parameters": {
"url": "https://app.dumplingai.com/api/v1/scrape",
"method": "POST",
"options": {},
"sendBody": true,
"authentication": "genericCredentialType",
"bodyParameters": {
"parameters": [
{
"name": "url",
"value": "={{ $json.blogUrl }}"
}
]
},
"genericAuthType": "httpHeaderAuth"
},
"credentials": {
"httpHeaderAuth": {
"id": "RLFzAcGRepr5eXZB",
"name": "Dumpling AI-n8n"
}
},
"typeVersion": 4.2
},
{
"id": "d5135eaa-34f2-4d9e-afa9-5dd4a98b658b",
"name": "Préparer les données de ligne",
"type": "n8n-nodes-base.set",
"position": [
1680,
0
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "ce53a00e-e3ff-49b8-a867-2b2005655946",
"name": "Url",
"type": "string",
"value": "={{ $('Form Submission ').item.json[\"Client URL\"] }}"
},
{
"id": "8d2cc248-a6da-44a2-a22a-865e13f9d15c",
"name": "Crawled_pages",
"type": "string",
"value": "={{ $('Extract Blog URLs').item.json.blogUrl }}"
},
{
"id": "4a3c75d6-fa9e-4cb5-84b0-3fb33f38bd45",
"name": "website_content",
"type": "string",
"value": "={{ $json.content }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "7fb2c810-f438-48af-a075-29e514d6855c",
"name": "Enregistrer les données du blog dans Google Sheets",
"type": "n8n-nodes-base.googleSheets",
"position": [
1904,
0
],
"parameters": {
"columns": {
"value": {},
"schema": [
{
"id": "Url",
"type": "string",
"display": true,
"required": false,
"displayName": "Url",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Crawled_pages",
"type": "string",
"display": true,
"required": false,
"displayName": "Crawled_pages",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "website_content",
"type": "string",
"display": true,
"required": false,
"displayName": "website_content",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "autoMapInputData",
"matchingColumns": [],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "append",
"sheetName": {
"__rl": true,
"mode": "id",
"value": "={{ $('Create Blog Audit Sheet').item.json.sheets[0].properties.sheetId }}"
},
"documentId": {
"__rl": true,
"mode": "url",
"value": "={{ $('Create Blog Audit Sheet').item.json.spreadsheetUrl }}"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "GaJqJHuS5mQxap7q",
"name": "Google Sheets account"
}
},
"typeVersion": 4.7
},
{
"id": "bbd3cb7c-b1a5-4919-8617-9050dabdde20",
"name": "Note autocollante",
"type": "n8n-nodes-base.stickyNote",
"position": [
-128,
-304
],
"parameters": {
"width": 880,
"height": 448,
"content": "## Workflow Overview\n\n1. **Trigger: Form Submission (Client URL)** — Starts the workflow when a client URL is entered. \n2. **Create Blog Audit Sheet** — Creates a new Google Sheet for the audit. \n3. **Set Sheet Headers** — Defines the columns (URL, Crawled Pages, Website Content). \n4. **Format Header Row** — Prepares the headers into the right format for Google Sheets. \n5. **Insert Headers into Sheet** — Updates the sheet with the headers. \n6. **Dumpling AI: Crawl Website** — Crawls the submitted URL to discover pages. \n7. **Extract Blog URLs** — Filters the crawl results to keep only blog-related links. \n8. **Dumpling AI: Scrape Blog Pages** — Scrapes the content from each blog page. \n9. **Prepare Row Data** — Maps the URL, crawled page, and content into structured fields. \n10. **Save Blog Data to Google Sheets** — Appends the results into the audit sheet for review.\n\n"
},
"typeVersion": 1
}
],
"active": false,
"pinData": {
"Form Submission ": [
{
"json": {
"formMode": "test",
"Client URL": "https://crane-baby.com/",
"submittedAt": "2025-08-23T19:14:17.927+10:00"
}
}
],
"Extract Blog URLs": [
{
"json": {
"blogUrl": "https://crane-baby.com/blog"
}
},
{
"json": {
"blogUrl": "https://crane-baby.com/blog/"
}
},
{
"json": {
"blogUrl": "https://crane-baby.com/free-printable-art/"
}
}
],
"Set Sheet Headers": [
{
"json": {
"rows": "Url,Crawled_pages,website_content",
"sheets": [
{
"properties": {
"index": 0,
"title": "Blog content audit",
"sheetId": 618343398,
"sheetType": "GRID",
"gridProperties": {
"rowCount": 1000,
"columnCount": 26
}
}
}
],
"properties": {
"title": "https://crane-baby",
"locale": "en_US",
"timeZone": "Etc/GMT",
"autoRecalc": "ON_CHANGE",
"defaultFormat": {
"padding": {
"top": 2,
"left": 3,
"right": 3,
"bottom": 2
},
"textFormat": {
"bold": false,
"italic": false,
"fontSize": 10,
"underline": false,
"fontFamily": "arial,sans,sans-serif",
"strikethrough": false,
"foregroundColor": {},
"foregroundColorStyle": {
"rgbColor": {}
}
},
"wrapStrategy": "OVERFLOW_CELL",
"backgroundColor": {
"red": 1,
"blue": 1,
"green": 1
},
"verticalAlignment": "BOTTOM",
"backgroundColorStyle": {
"rgbColor": {
"red": 1,
"blue": 1,
"green": 1
}
}
},
"spreadsheetTheme": {
"themeColors": [
{
"color": {
"rgbColor": {}
},
"colorType": "TEXT"
},
{
"color": {
"rgbColor": {
"red": 1,
"blue": 1,
"green": 1
}
},
"colorType": "BACKGROUND"
},
{
"color": {
"rgbColor": {
"red": 0.25882354,
"blue": 0.95686275,
"green": 0.52156866
}
},
"colorType": "ACCENT1"
},
{
"color": {
"rgbColor": {
"red": 0.91764706,
"blue": 0.20784314,
"green": 0.2627451
}
},
"colorType": "ACCENT2"
},
{
"color": {
"rgbColor": {
"red": 0.9843137,
"blue": 0.015686275,
"green": 0.7372549
}
},
"colorType": "ACCENT3"
},
{
"color": {
"rgbColor": {
"red": 0.20392157,
"blue": 0.3254902,
"green": 0.65882355
}
},
"colorType": "ACCENT4"
},
{
"color": {
"rgbColor": {
"red": 1,
"blue": 0.003921569,
"green": 0.42745098
}
},
"colorType": "ACCENT5"
},
{
"color": {
"rgbColor": {
"red": 0.27450982,
"blue": 0.7764706,
"green": 0.7411765
}
},
"colorType": "ACCENT6"
},
{
"color": {
"rgbColor": {
"red": 0.06666667,
"blue": 0.8,
"green": 0.33333334
}
},
"colorType": "LINK"
}
],
"primaryFontFamily": "Arial"
}
},
"spreadsheetId": "1f3TscxV2YsGuUIfi-VfF4otWOjZZaO6SXH5ToHvDdjQ",
"spreadsheetUrl": "https://docs.google.com/spreadsheets/d/1f3TscxV2YsGuUIfi-VfF4otWOjZZaO6SXH5ToHvDdjQ/edit"
}
}
],
"Create Blog Audit Sheet": [
{
"json": {
"sheets": [
{
"properties": {
"index": 0,
"title": "Blog content audit",
"sheetId": 618343398,
"sheetType": "GRID",
"gridProperties": {
"rowCount": 1000,
"columnCount": 26
}
}
}
],
"properties": {
"title": "https://crane-baby",
"locale": "en_US",
"timeZone": "Etc/GMT",
"autoRecalc": "ON_CHANGE",
"defaultFormat": {
"padding": {
"top": 2,
"left": 3,
"right": 3,
"bottom": 2
},
"textFormat": {
"bold": false,
"italic": false,
"fontSize": 10,
"underline": false,
"fontFamily": "arial,sans,sans-serif",
"strikethrough": false,
"foregroundColor": {},
"foregroundColorStyle": {
"rgbColor": {}
}
},
"wrapStrategy": "OVERFLOW_CELL",
"backgroundColor": {
"red": 1,
"blue": 1,
"green": 1
},
"verticalAlignment": "BOTTOM",
"backgroundColorStyle": {
"rgbColor": {
"red": 1,
"blue": 1,
"green": 1
}
}
},
"spreadsheetTheme": {
"themeColors": [
{
"color": {
"rgbColor": {}
},
"colorType": "TEXT"
},
{
"color": {
"rgbColor": {
"red": 1,
"blue": 1,
"green": 1
}
},
"colorType": "BACKGROUND"
},
{
"color": {
"rgbColor": {
"red": 0.25882354,
"blue": 0.95686275,
"green": 0.52156866
}
},
"colorType": "ACCENT1"
},
{
"color": {
"rgbColor": {
"red": 0.91764706,
"blue": 0.20784314,
"green": 0.2627451
}
},
"colorType": "ACCENT2"
},
{
"color": {
"rgbColor": {
"red": 0.9843137,
"blue": 0.015686275,
"green": 0.7372549
}
},
"colorType": "ACCENT3"
},
{
"color": {
"rgbColor": {
"red": 0.20392157,
"blue": 0.3254902,
"green": 0.65882355
}
},
"colorType": "ACCENT4"
},
{
"color": {
"rgbColor": {
"red": 1,
"blue": 0.003921569,
"green": 0.42745098
}
},
"colorType": "ACCENT5"
},
{
"color": {
"rgbColor": {
"red": 0.27450982,
"blue": 0.7764706,
"green": 0.7411765
}
},
"colorType": "ACCENT6"
},
{
"color": {
"rgbColor": {
"red": 0.06666667,
"blue": 0.8,
"green": 0.33333334
}
},
"colorType": "LINK"
}
],
"primaryFontFamily": "Arial"
}
},
"spreadsheetId": "1f3TscxV2YsGuUIfi-VfF4otWOjZZaO6SXH5ToHvDdjQ",
"spreadsheetUrl": "https://docs.google.com/spreadsheets/d/1f3TscxV2YsGuUIfi-VfF4otWOjZZaO6SXH5ToHvDdjQ/edit"
}
}
],
"Insert Headers into Sheet": [
{
"json": {
"updatedRows": 1,
"updatedCells": 3,
"updatedRange": "'Blog content audit'!A1:C1",
"spreadsheetId": "1f3TscxV2YsGuUIfi-VfF4otWOjZZaO6SXH5ToHvDdjQ",
"updatedColumns": 3
}
}
]
},
"settings": {
"executionOrder": "v1"
},
"versionId": "319cd2cf-cb58-48ba-80dd-88c67a42fa8f",
"connections": {
"b9be7abb-d02c-4383-b18d-e40f77f833bf": {
"main": [
[
{
"node": "e4c0b49d-ec38-4357-ad7c-bb47145b0cd5",
"type": "main",
"index": 0
}
]
]
},
"d5135eaa-34f2-4d9e-afa9-5dd4a98b658b": {
"main": [
[
{
"node": "7fb2c810-f438-48af-a075-29e514d6855c",
"type": "main",
"index": 0
}
]
]
},
"bf50d679-1f16-4811-9e20-99eaccd9fc44": {
"main": [
[
{
"node": "cf4d1e28-6dd4-46f5-bdde-6882b9bc59d9",
"type": "main",
"index": 0
}
]
]
},
"3630f64c-a9df-4c18-a668-822b5d0aed00": {
"main": [
[
{
"node": "171ecc40-447d-4a64-adc8-f0d38d1d9cdf",
"type": "main",
"index": 0
}
]
]
},
"1138d43b-14bb-4f9c-a6dc-f82cc965e5c4": {
"main": [
[
{
"node": "3630f64c-a9df-4c18-a668-822b5d0aed00",
"type": "main",
"index": 0
}
]
]
},
"e4c0b49d-ec38-4357-ad7c-bb47145b0cd5": {
"main": [
[
{
"node": "1138d43b-14bb-4f9c-a6dc-f82cc965e5c4",
"type": "main",
"index": 0
}
]
]
},
"171ecc40-447d-4a64-adc8-f0d38d1d9cdf": {
"main": [
[
{
"node": "a1b24d76-a502-409a-b310-d4d3cc5e7c9a",
"type": "main",
"index": 0
}
]
]
},
"a1b24d76-a502-409a-b310-d4d3cc5e7c9a": {
"main": [
[
{
"node": "bf50d679-1f16-4811-9e20-99eaccd9fc44",
"type": "main",
"index": 0
}
]
]
},
"cf4d1e28-6dd4-46f5-bdde-6882b9bc59d9": {
"main": [
[
{
"node": "d5135eaa-34f2-4d9e-afa9-5dd4a98b658b",
"type": "main",
"index": 0
}
]
]
}
}
}Comment utiliser ce workflow ?
Copiez le code de configuration JSON ci-dessus, créez un nouveau workflow dans votre instance n8n et sélectionnez "Importer depuis le JSON", collez la configuration et modifiez les paramètres d'authentification selon vos besoins.
Dans quelles scénarios ce workflow est-il adapté ?
Intermédiaire - Étude de marché, IA Multimodale
Est-ce payant ?
Ce workflow est entièrement gratuit et peut être utilisé directement. Veuillez noter que les services tiers utilisés dans le workflow (comme l'API OpenAI) peuvent nécessiter un paiement de votre part.
Workflows recommandés
Yang
@yangPartager ce workflow