Extrae sitios web y responde preguntas usando GPT-5 nano y Google Sheets
Este es unMarket Research, Multimodal AIflujo de automatización del dominio deautomatización que contiene 44 nodos.Utiliza principalmente nodos como If, Set, Xml, Code, Markdown. Usa GPT-5 nano y Google Sheets para extraer sitios web y responder preguntas
- •Pueden requerirse credenciales de autenticación para la API de destino
- •Credenciales de API de Google Sheets
- •Clave de API de OpenAI
Nodos utilizados (44)
{
"meta": {
"instanceId": "3dd9efe937707b07af3ede5b46321ec0e2a9e49d7ef201e274c4c4aa1a4615a6",
"templateCredsSetupCompleted": true
},
"nodes": [
{
"id": "05174bb2-efd5-4de8-9e17-26c2a85eff06",
"name": "Agente de IA",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
-2432,
272
],
"parameters": {
"text": "={{ $node[\"Chat web\"].json[\"chatInput\"] }}",
"options": {
"systemMessage": "Actuas como una página web, mediante la tool sheet tienes acceso a toda la página web y todo lo que te pida el usaurio puedes consultarlo allí, responde el usuario en base a la info de allí"
},
"promptType": "define"
},
"typeVersion": 2.2
},
{
"id": "6dc738b3-4ebe-4f5b-b8b3-decf9ce15e70",
"name": "Modelo de chat OpenAI",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
-2496,
480
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-5-nano",
"cachedResultName": "gpt-5-nano"
},
"options": {}
},
"credentials": {
"openAiApi": {
"id": "dfSo8Emt7Jfhxkoj",
"name": "OpenAi account"
}
},
"typeVersion": 1.2
},
{
"id": "0461df33-2d2f-42e2-a0d3-288bd78275f1",
"name": "Memoria simple",
"type": "@n8n/n8n-nodes-langchain.memoryBufferWindow",
"position": [
-2336,
480
],
"parameters": {
"contextWindowLength": 50
},
"typeVersion": 1.3
},
{
"id": "fdcce6e6-f00f-4f84-ac6e-2e181452d3ac",
"name": "Obtener fila(s) en hoja en Google Sheets",
"type": "n8n-nodes-base.googleSheetsTool",
"position": [
-1968,
464
],
"parameters": {
"options": {},
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit#gid=0",
"cachedResultName": "Web"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit?usp=drivesdk",
"cachedResultName": "Web chat Workflow"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "wVh07BIjSJqZc11s",
"name": "Google Sheets account"
}
},
"typeVersion": 4.6
},
{
"id": "621aa928-83c5-48a4-8488-67c58fa1aec8",
"name": "Si",
"type": "n8n-nodes-base.if",
"position": [
-3376,
560
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "0adf46cd-5ca1-418e-a8b8-0571240e0efb",
"operator": {
"type": "boolean",
"operation": "true",
"singleValue": true
},
"leftValue": "={{ $json['Data schema'] }}",
"rightValue": ""
}
]
}
},
"typeVersion": 2.2
},
{
"id": "8df9234a-85ad-45b4-bc17-ad64edaab08a",
"name": "Mapeo de mapa del sitio",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueErrorOutput",
"position": [
-1536,
736
],
"parameters": {
"url": "={{ $json.sitemapUrl }}",
"options": {},
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "User-Agent",
"value": "={{ $json.userAgent }}"
},
{
"name": "Accept-Language",
"value": "es-ES,es;q=0.9,en;q=0.8"
},
{
"name": "Accept-Encoding",
"value": "gzip, deflate, br"
},
{
"name": "Referer",
"value": "https://www.google.com/"
},
{
"name": "Connection",
"value": "keep-alive"
},
{
"name": "Upgrade-Insecure-Requests",
"value": "1"
},
{
"name": "Sec-Fetch-Dest",
"value": "document"
},
{
"name": "Sec-Fetch-Mode",
"value": "navigate"
},
{
"name": "DNT",
"value": "1"
},
{
"name": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "0d18ebca-52f3-46ed-934c-44c9bad53dab",
"name": "XML1",
"type": "n8n-nodes-base.xml",
"position": [
-1088,
960
],
"parameters": {
"options": {}
},
"typeVersion": 1
},
{
"id": "39127cf7-f627-4fca-b1b7-c51b3656947d",
"name": "UA Rotativo1",
"type": "n8n-nodes-base.code",
"position": [
-2160,
736
],
"parameters": {
"jsCode": "const userAgents = [\n // Escritorio - Windows\n \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36\",\n \"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0\",\n \"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36\",\n\n // Escritorio - Mac\n \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.1 Safari/605.1.15\",\n \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\",\n\n // Móvil - Android\n \"Mozilla/5.0 (Linux; Android 10; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36\",\n \"Mozilla/5.0 (Linux; Android 9; Mi 9T Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36\",\n\n // Móvil - iPhone\n \"Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1\",\n \"Mozilla/5.0 (iPhone; CPU iPhone OS 15_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.2 Mobile/15E148 Safari/604.1\"\n];\n\n// Escoge uno aleatorio\nconst randomUA = userAgents[Math.floor(Math.random() * userAgents.length)];\n\nreturn [\n {\n json: {\n userAgent: randomUA\n }\n }\n];\n"
},
"typeVersion": 2
},
{
"id": "4260d45a-8705-483a-b17f-58211512ba59",
"name": "Error de solicitud",
"type": "n8n-nodes-base.stopAndError",
"position": [
-1712,
592
],
"parameters": {
"errorMessage": "URL mal introducida, debes introducir con el siguiente formato: ejemplo.com"
},
"typeVersion": 1
},
{
"id": "59d0fe0a-9e27-4755-ac23-f46fa6d2aa95",
"name": "Error de mapa del sitio",
"type": "n8n-nodes-base.stopAndError",
"position": [
-1088,
528
],
"parameters": {
"errorMessage": "Sitemap no encontrado o acceso bloqueadp"
},
"executeOnce": false,
"typeVersion": 1
},
{
"id": "0526a778-8d63-4dcc-9815-a002ffd70a7f",
"name": "Solicitar robots.txt",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueErrorOutput",
"position": [
-1920,
736
],
"parameters": {
"url": "={{ $node[\"AI Agent1\"].json[\"output\"][\"URL\"] }}/robots.txt",
"options": {},
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "User-Agent",
"value": "={{ $json.userAgent }}"
},
{
"name": "Accept-Language",
"value": "es-ES,es;q=0.9,en;q=0.8"
},
{
"name": "Accept-Encoding",
"value": "gzip, deflate, br"
},
{
"name": "Referer",
"value": "https://www.google.com/"
},
{
"name": "Connection",
"value": "keep-alive"
},
{
"name": "Upgrade-Insecure-Requests",
"value": "1"
},
{
"name": "Sec-Fetch-Dest",
"value": "document"
},
{
"name": "Sec-Fetch-Mode",
"value": "navigate"
},
{
"name": "DNT",
"value": "1"
},
{
"name": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "5948d577-4aea-4394-9b20-687f44efe5c8",
"name": "Extraer URL del mapa del sitio",
"type": "n8n-nodes-base.code",
"position": [
-1712,
736
],
"parameters": {
"jsCode": "// Simulación: contenido del robots.txt como string, en n8n será $input o $json dependiendo de tu configuración\nconst robotsTxtContent = $input.first().json.data || \"\"; // Cambia esto por la variable correcta en n8n\n\n// Función para extraer URL del sitemap\nfunction extractSitemapUrl(robotsTxt) {\n // Buscamos línea que empiece con \"Sitemap:\" (ignorando mayúsculas y espacios)\n const lines = robotsTxt.split(/\\r?\\n/);\n for (const line of lines) {\n const match = line.match(/^\\s*Sitemap:\\s*(.+)$/i);\n if (match) {\n return match[1].trim();\n }\n }\n return null; // No encontrado\n}\n\nconst sitemapUrl = extractSitemapUrl(robotsTxtContent);\n\n// Devolver JSON con la URL del sitemap (o null si no hay)\nreturn [{ json: { sitemapUrl } }];"
},
"typeVersion": 2
},
{
"id": "c12d6ae7-23ee-4f7a-9a33-7e43d1e475b9",
"name": "OPTIONS",
"type": "n8n-nodes-base.set",
"position": [
-2400,
736
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "71b9ad22-d418-4fff-92bb-dafd0818575d",
"name": "scan_pages",
"type": "boolean",
"value": true
},
{
"id": "42483a05-34f0-4cef-b404-dae43a7bee22",
"name": "scan_posts",
"type": "boolean",
"value": false
},
{
"id": "00a5ed31-dd44-4f9f-97f1-7aa4fe636afd",
"name": "category",
"type": "boolean",
"value": false
},
{
"id": "a2b0930f-8a9b-4f78-8d20-466366853b55",
"name": "tags",
"type": "boolean",
"value": false
}
]
}
},
"typeVersion": 3.4
},
{
"id": "2ab278f9-2904-4b6c-a2a3-6a703c0bb3ae",
"name": "Agente de IA1",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
-3024,
736
],
"parameters": {
"text": "={{ $node[\"Chat web\"].json[\"chatInput\"] }}",
"options": {
"systemMessage": "Responde en formato JSON, el url si lo es, si no lo es pon cualquier valor y con una boolean que se indica respondiendo si es url o no (true or false)"
},
"promptType": "define",
"hasOutputParser": true
},
"typeVersion": 2.2
},
{
"id": "afdbed61-346e-44a6-aa69-23a2b7ecf553",
"name": "Modelo de chat OpenAI1",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
-3024,
944
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-5-nano",
"cachedResultName": "gpt-5-nano"
},
"options": {}
},
"credentials": {
"openAiApi": {
"id": "dfSo8Emt7Jfhxkoj",
"name": "OpenAi account"
}
},
"typeVersion": 1.2
},
{
"id": "175a77cd-bd0a-4849-8c9b-d36b4ddcecd9",
"name": "Chat web",
"type": "@n8n/n8n-nodes-langchain.chatTrigger",
"position": [
-3776,
560
],
"webhookId": "01764d18-dae5-4dff-8e99-cb90682e9187",
"parameters": {
"public": true,
"options": {
"responseMode": "responseNodes"
},
"authentication": "basicAuth"
},
"credentials": {
"httpBasicAuth": {
"id": "PkymFgJgUnBzIwMu",
"name": "Unnamed credential"
}
},
"typeVersion": 1.3
},
{
"id": "16ef0fa6-4259-43bf-b74f-3dc70d4b54e3",
"name": "Analizador de salida estructurada",
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
"position": [
-2880,
944
],
"parameters": {
"jsonSchemaExample": "{\n \"URL\": \"example.com\",\n \"URL_bool\":true\n}"
},
"typeVersion": 1.3
},
{
"id": "15992fbe-4ee5-4630-a377-f1b8d21ebc1b",
"name": "Si1",
"type": "n8n-nodes-base.if",
"position": [
-2640,
752
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "3851cb51-a282-4388-b4f6-1c1f68e8c7c5",
"operator": {
"type": "boolean",
"operation": "true",
"singleValue": true
},
"leftValue": "={{ $json.output.URL_bool }}",
"rightValue": ""
}
]
}
},
"typeVersion": 2.2
},
{
"id": "ab8d3076-4420-48ef-b8fa-e25adbbd11e2",
"name": "Responder al chat",
"type": "@n8n/n8n-nodes-langchain.chat",
"position": [
-2400,
928
],
"parameters": {
"message": "Debes introducir una URL válida ejemplo: https://google.es",
"options": {}
},
"typeVersion": 1
},
{
"id": "c5a8dd49-3a82-45c7-a139-b30b4cc21e05",
"name": "Responder al chat1",
"type": "@n8n/n8n-nodes-langchain.chat",
"position": [
-2080,
272
],
"parameters": {
"message": "={{ $json.output }}",
"options": {}
},
"typeVersion": 1
},
{
"id": "d665823a-b40a-45a5-ac12-0a789c1b8ecd",
"name": "Enviar mensaje a un modelo",
"type": "@n8n/n8n-nodes-langchain.openAi",
"position": [
-3024,
1280
],
"parameters": {
"modelId": {
"__rl": true,
"mode": "list",
"value": "gpt-4o",
"cachedResultName": "GPT-4O"
},
"options": {},
"messages": {
"values": [
{
"role": "system",
"content": "=De aquí saca y devuelve en formato JSON, los siguientes urls de los sitemap que sean true: \n\nPages: {{ $('OPTIONS').item.json.scan_pages }}\n\nPosts: {{ $('OPTIONS').item.json.scan_posts }}\n\nCategorias: {{ $('OPTIONS').item.json.category }}\n\nTags: {{ $('OPTIONS').item.json.tags }}\n\nSalida:\n\n{\n\"sitemap_page\":\"https://...\",\n\"sitemap_posts\":\"https://\"\n}"
},
{
"content": "=Sitemap: \n{{ $json.sitemapindex.sitemap[0].loc }}\n\n{{ $json.sitemapindex.sitemap[1].loc }}\n\n{{ $json.sitemapindex.sitemap[2].loc }}"
}
]
},
"jsonOutput": true
},
"credentials": {
"openAiApi": {
"id": "dfSo8Emt7Jfhxkoj",
"name": "OpenAi account"
}
},
"typeVersion": 1.8
},
{
"id": "d71fbfb6-3e9b-427b-afe3-6fd77ff77ede",
"name": "XML",
"type": "n8n-nodes-base.xml",
"position": [
-2480,
1280
],
"parameters": {
"options": {}
},
"typeVersion": 1
},
{
"id": "6578bcc5-b412-46bf-88d5-8b285372e9b9",
"name": "Iterar sobre elementos",
"type": "n8n-nodes-base.splitInBatches",
"position": [
-1856,
1280
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "25c7cbaf-7eb9-4e71-a488-b6d16242d324",
"name": "Enviar mensaje a un modelo1",
"type": "@n8n/n8n-nodes-langchain.openAi",
"position": [
-1200,
1408
],
"parameters": {
"modelId": {
"__rl": true,
"mode": "list",
"value": "gpt-5-nano",
"cachedResultName": "GPT-5-NANO"
},
"options": {},
"messages": {
"values": [
{
"role": "system",
"content": "El usuario te mandara el contenido de la página web, tu mision es sacar un resumen de la página web, idioma de la págn, h1, enlaces internos (no imagenes ni css ni js) y enlaces externos y añadirlos mediante la tool sheet a la db"
},
{
"content": "=URL: {{ $('Split URLs').item.json.urls }}\n\n{{ $json.data }}"
}
]
}
},
"credentials": {
"openAiApi": {
"id": "dfSo8Emt7Jfhxkoj",
"name": "OpenAi account"
}
},
"typeVersion": 1.8
},
{
"id": "6fb7c3fa-7851-49cd-8d0b-01df74a80f35",
"name": "Añadir fila en hoja en Google Sheets",
"type": "n8n-nodes-base.googleSheetsTool",
"position": [
-1056,
1648
],
"parameters": {
"columns": {
"value": {
"Lang": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('Lang', ``, 'string') }}",
"Page URL": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('Page_URL', ``, 'string') }}",
"External URLs": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('External_URLs', ``, 'string') }}",
"Internal URLs": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('Internal_URLs', ``, 'string') }}",
"Summary Content": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('Summary_Content', ``, 'string') }}",
"H1 and hierarchy": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('H1_and_hierarchy', ``, 'string') }}"
},
"schema": [
{
"id": "Page URL",
"type": "string",
"display": true,
"required": false,
"displayName": "Page URL",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Content text",
"type": "string",
"display": true,
"required": false,
"displayName": "Content text",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Lang",
"type": "string",
"display": true,
"required": false,
"displayName": "Lang",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "H1 and hierarchy",
"type": "string",
"display": true,
"required": false,
"displayName": "H1 and hierarchy",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "External URLs",
"type": "string",
"display": true,
"required": false,
"displayName": "External URLs",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Internal URLs",
"type": "string",
"display": true,
"required": false,
"displayName": "Internal URLs",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Summary Content",
"type": "string",
"display": true,
"required": false,
"displayName": "Summary Content",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Data schema",
"type": "string",
"display": true,
"required": false,
"displayName": "Data schema",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "append",
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit#gid=0",
"cachedResultName": "Web"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit?usp=drivesdk",
"cachedResultName": "Web chat Workflow"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "wVh07BIjSJqZc11s",
"name": "Google Sheets account"
}
},
"typeVersion": 4.6
},
{
"id": "9a5d11a2-0fc8-48a1-8fa0-c2f53fb49b54",
"name": "Completado",
"type": "n8n-nodes-base.googleSheets",
"position": [
-1616,
1168
],
"parameters": {
"columns": {
"value": {
"Data schema": "={{true}}"
},
"schema": [
{
"id": "Page URL",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Page URL",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Content text",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Content text",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Lang",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Lang",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "H1 and hierarchy",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "H1 and hierarchy",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "External URLs",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "External URLs",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Internal URLs",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Internal URLs",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Summary Content",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Summary Content",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Data schema",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "Data schema",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [
"Data schema"
],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "appendOrUpdate",
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit#gid=0",
"cachedResultName": "Web"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit?usp=drivesdk",
"cachedResultName": "Web chat Workflow"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "wVh07BIjSJqZc11s",
"name": "Google Sheets account"
}
},
"typeVersion": 4.6
},
{
"id": "c371c8db-e752-48fa-999d-4813aeb13f38",
"name": "Solicitud HTTP2",
"type": "n8n-nodes-base.httpRequestTool",
"position": [
-2176,
480
],
"parameters": {
"url": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('URL', ``, 'string') }}",
"options": {}
},
"typeVersion": 4.2
},
{
"id": "40169b8e-5948-4422-98d9-4bca87ccab73",
"name": "Combinar",
"type": "n8n-nodes-base.code",
"position": [
-2272,
1280
],
"parameters": {
"jsCode": "// Obtenemos el array de URLs del JSON\nconst urlsArray = $input.first().json.urlset.url;\n\n// Creamos un objeto donde cada clave es \"url 1\", \"url 2\", etc.\nconst result = {};\nurlsArray.forEach((item, index) => {\n if (item.loc) {\n result[`url ${index + 1}`] = item.loc;\n }\n});\n\n// Devolvemos el objeto\nreturn [\n {\n json: {\n urls: result\n }\n }\n];\n"
},
"typeVersion": 2
},
{
"id": "5d98fe9e-890c-4c9f-81c8-309cc23dc8af",
"name": "Dividir URLs",
"type": "n8n-nodes-base.splitOut",
"position": [
-2064,
1280
],
"parameters": {
"options": {},
"fieldToSplitOut": "urls"
},
"typeVersion": 1
},
{
"id": "98abaa2b-ddbc-4c04-830e-d7112a6a57e2",
"name": "Solicitar URL",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueRegularOutput",
"position": [
-1616,
1408
],
"parameters": {
"url": "={{ $('Split URLs').item.json.urls }}",
"options": {},
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "User-Agent",
"value": "={{ $json.userAgent }}"
},
{
"name": "Accept-Language",
"value": "es-ES,es;q=0.9,en;q=0.8"
},
{
"name": "Accept-Encoding",
"value": "gzip, deflate, br"
},
{
"name": "Referer",
"value": "https://www.google.com/"
},
{
"name": "Connection",
"value": "keep-alive"
},
{
"name": "Upgrade-Insecure-Requests",
"value": "1"
},
{
"name": "Sec-Fetch-Dest",
"value": "document"
},
{
"name": "Sec-Fetch-Mode",
"value": "navigate"
},
{
"name": "DNT",
"value": "1"
},
{
"name": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "71d974a6-4f60-4573-be09-7cbb09502fa3",
"name": "HTML a Markdown",
"type": "n8n-nodes-base.markdown",
"position": [
-1408,
1408
],
"parameters": {
"html": "={{ $json.data }}",
"options": {}
},
"typeVersion": 1
},
{
"id": "f076a729-8f40-4a3b-ad32-83837964c42c",
"name": "Mapeo de mapas del sitio",
"type": "n8n-nodes-base.httpRequest",
"position": [
-2672,
1280
],
"parameters": {
"url": "={{ $json.message.content.sitemap_page }}",
"options": {},
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Accept-Language",
"value": "es-ES,es;q=0.9,en;q=0.8"
},
{
"name": "Accept-Encoding",
"value": "gzip, deflate, br"
},
{
"name": "Referer",
"value": "https://www.google.com/"
},
{
"name": "Connection",
"value": "keep-alive"
},
{
"name": "Upgrade-Insecure-Requests",
"value": "1"
},
{
"name": "Sec-Fetch-Dest",
"value": "document"
},
{
"name": "Sec-Fetch-Mode",
"value": "navigate"
},
{
"name": "DNT",
"value": "1"
},
{
"name": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "bca3322d-bc2e-4932-a5f2-a2e9548a8aef",
"name": "Obtener esquema de datos",
"type": "n8n-nodes-base.googleSheets",
"maxTries": 5,
"position": [
-3568,
560
],
"parameters": {
"options": {},
"filtersUI": {
"values": [
{
"lookupValue": "={{ true }}",
"lookupColumn": "Data schema"
}
]
},
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit#gid=0",
"cachedResultName": "Web"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit?usp=drivesdk",
"cachedResultName": "Web chat Workflow"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "wVh07BIjSJqZc11s",
"name": "Google Sheets account"
}
},
"retryOnFail": true,
"typeVersion": 4.6,
"alwaysOutputData": true,
"waitBetweenTries": 3000
},
{
"id": "d051d2f3-cc65-4cb8-8a67-90d7df0dda08",
"name": "Nota adhesiva",
"type": "n8n-nodes-base.stickyNote",
"position": [
-3568,
736
],
"parameters": {
"width": 256,
"height": 176,
"content": "## Document example URL \nhttps://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit?usp=sharing"
},
"typeVersion": 1
},
{
"id": "2ffd406b-1b1b-44d6-be7e-5bbdf73ad5d0",
"name": "Nota adhesiva1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-3776,
240
],
"parameters": {
"color": 5,
"width": 496,
"height": 288,
"content": "## Overview\nThis is a web consultation chat workflow that, on the first run with a given URL, discovers the sitemap, crawls the site, extracts useful information (language, H1 hierarchy, internal/external links, summary) and stores it in Google Sheets.\n\nFrom then on, if the “schema” flag is set in the sheet (Data schema = true), the chat switches to an Agent mode that responds to the user “as if it were the website,” consulting the database (Google Sheets) and making controlled HTTP requests when needed.\n\n"
},
"typeVersion": 1
},
{
"id": "303ec015-8a82-4092-9dd9-46bb7658a1d3",
"name": "Nota adhesiva2",
"type": "n8n-nodes-base.stickyNote",
"position": [
-864,
32
],
"parameters": {
"width": 992,
"height": 1104,
"content": "## 1) Chat trigger & schema check\n\n* **Chat web (trigger)** – Public webhook with Basic Auth. Captures `chatInput`.\n* **Get data schema (Google Sheets)** – Filters rows where **Data schema = true**.\n* **If**:\n\n * If schema exists → **Branch A (Agent mode with existing data)**.\n * If not → **Branch B (URL validation & initial crawling)**.\n\n---\n\n## 2) Branch A — Agent mode (consults an already indexed site)\n\n* **AI Agent** (LangChain Agent):\n\n * *System*: “You act as a website… use the tool sheet to access all site info.”\n * **Connected tools**:\n\n * **Get row(s) in sheet in Google Sheets** – lets the agent read the database.\n * **HTTP Request2** – allows the agent to fetch a URL it generates via `$fromAI('URL')`.\n * **OpenAI Chat Model (gpt-5-nano)** – LLM powering the agent.\n * **Simple Memory** – short-term context window (50 messages).\n* **Respond to Chat1** – Sends the agent’s `output` back to the user.\n\n**Purpose**: The user can ask questions (“What’s on page X?”, “What links are there?”), and the agent answers using the sheet’s stored data and, if necessary, live HTTP fetches.\n\n---\n\n## 3) Branch B — URL validation & crawl preparation\n\n* **AI Agent1** (URL classifier):\n\n * *System*: “Return JSON with `URL` and `URL_bool` (true if it’s a valid URL).”\n * **OpenAI Chat Model1 (gpt-5-nano)** + **Structured Output Parser** enforce JSON.\n* **If1**:\n\n * If `URL_bool = true` → continue.\n * If `false` → **Respond to Chat** (“You must enter a valid URL…”).\n* **OPTIONS (Set)**: Flags to choose which sitemaps to process:\n\n * `scan_pages: true`, `scan_posts/category/tags: false` (pages only).\n* **UA Rotativo1 (Code)**: Selects a **random User-Agent** (desktop/mobile, Win/Mac/iOS/Android) to reduce blocking.\n* **Req robots (HTTP Request)**: Downloads `robots.txt` from `{{ AI Agent1.output.URL }}/robots.txt` with realistic headers (language, compression, referer, etc.). If it fails, goes to **Req Error** (“URL not valid…”).\n* **extract sitemap url (Code)**: Parses `robots.txt` and **extracts the `Sitemap:` line** → `sitemapUrl`.\n* **Maping Sitemap (HTTP Request)**: Downloads the **sitemap index** (`sitemapindex`) with error handling (failure → **Sitemap Error**).\n* **XML1 (XML→JSON)**: Converts the sitemap XML to JSON.\n\n"
},
"typeVersion": 1
},
{
"id": "f37eac9a-5e47-45a3-a1ba-e65ebb312571",
"name": "Nota adhesiva3",
"type": "n8n-nodes-base.stickyNote",
"position": [
-3968,
560
],
"parameters": {
"width": 150,
"height": 96,
"content": "# P1"
},
"typeVersion": 1
},
{
"id": "8a2b47b5-dd9c-4f20-b76f-437446d0d0c6",
"name": "Nota adhesiva4",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2720,
288
],
"parameters": {
"width": 166,
"height": 272,
"content": "\n\n\n\n\n\n\n\n# P2"
},
"typeVersion": 1
},
{
"id": "d63fd3cc-2966-4460-8de0-8b871d6f2e78",
"name": "Nota adhesiva5",
"type": "n8n-nodes-base.stickyNote",
"position": [
-3248,
736
],
"parameters": {
"width": 150,
"height": 320,
"content": "\n\n\n\n\n\n\n\n\n\n\n# P3"
},
"typeVersion": 1
},
{
"id": "059d0c29-58d7-4b75-9ec4-89d8b1b8e54b",
"name": "Nota adhesiva6",
"type": "n8n-nodes-base.stickyNote",
"position": [
-848,
1200
],
"parameters": {
"width": 992,
"height": 1248,
"content": "## 4) Selecting the specific sitemap (pages)\n\n* **Message a model (GPT-4o)**:\n\n * *System*: Ask the model to **select and return in JSON** the relevant sitemaps based on the flags (`scan_pages`, `scan_posts`, etc.).\n * *User*: Passes the first 3 `loc` entries from `sitemapindex`.\n * **Expected output**: e.g. `{ \"sitemap_page\": \"https://…\" }`.\n* **Maping Sitemaps (HTTP Request)**: Downloads the **pages sitemap** provided by the model.\n* **XML (XML→JSON)**: Converts it to JSON (`urlset.url`).\n\n---\n\n## 5) Expanding URLs & page-by-page processing loop\n\n* **Merge (Code)**: Turns `urlset.url` into an object `{ urls: { \"url 1\": \"...\", \"url 2\": \"...\" } }`.\n* **Split URLs**: Breaks that object into **one item per URL**.\n* **Loop Over Items (SplitInBatches)**: Iterates through each URL (supports batching). From here, two parallel flows run:\n\n 1. **Req URL (HTTP Request)** → **HTML to Markdown** → **Message a model1 (gpt-5-nano)**:\n\n * *System*: “You will receive page content; extract: summary, language, H1/hierarchy, internal links (no CSS/JS/images), external links; then add them to the DB via the sheet tool.”\n * *User*: `{{ $json.data }}` (HTML converted to Markdown).\n * **Append row in sheet in Google Sheets (Tool)** is connected as an **AI tool** and uses `$fromAI(...)` mappings to fill columns:\n\n * **Lang**, **H1 and hierarchy**, **External URLs**, **Internal URLs**, **Summary Content**.\n * **Result**: Adds one row per page with extracted fields.\n 2. **Complete (Google Sheets, appendOrUpdate)**:\n\n * Marks/ensures a row with **`Data schema = true`** exists (acts as a “ready” flag so future runs go into **Agent mode**).\n\n---\n\n## 6) What gets stored in Google Sheets\n\nSheet: **“Web”** (gid=0). Columns in the schema:\n\n* **Lang** – detected language.\n* **H1 and hierarchy** – H1 and heading hierarchy.\n* **External URLs** – outbound links.\n* **Internal URLs** – valid internal links.\n* **Summary Content** – page summary.\n* **Data schema** – boolean flag controlling the flow mode.\n\n> Note: In the current mapping of “Append row…”, only these 5 are populated. “Page URL” and “Content text” exist in the schema but are not currently mapped (you could add them by requesting from the LLM and using `$fromAI`).\n\n\n\n## 7) Models & memory\n\n* **gpt-5-nano** – for URL classification, per-page structured extraction, and main Agent responses when data exists.\n* **GPT-4o** – for sitemap selection from the index.\n* **Memory** – 50-message context window for short-term chat continuity in Agent mode.\n"
},
"typeVersion": 1
},
{
"id": "33d198cc-058e-4935-9e49-adc77baf654b",
"name": "Nota adhesiva7",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2144,
912
],
"parameters": {
"color": 3,
"width": 608,
"height": 192,
"content": "## User experience per case\n\n* **No valid URL** – Returns a message requesting a sample URL.\n* **First time with valid URL** – No data yet; runs **discovery → crawling → extraction → save to Sheets**.\n* **Subsequent interactions** (with `Data schema = true`) – **Agent** answers as if it were the website, using the **DB in Sheets** and live HTTP when needed.\n"
},
"typeVersion": 1
},
{
"id": "670ecb31-9a8d-4d13-aa94-066463f91e6a",
"name": "Nota adhesiva8",
"type": "n8n-nodes-base.stickyNote",
"position": [
-3248,
1136
],
"parameters": {
"width": 150,
"height": 320,
"content": "\n\n\n\n\n\n\n\n\n\n\n# P4"
},
"typeVersion": 1
},
{
"id": "c0bffbee-cf0d-4abf-99a0-dbbc5347c08d",
"name": "Nota adhesiva9",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2096,
1440
],
"parameters": {
"width": 150,
"height": 320,
"content": "\n\n\n\n\n\n\n\n\n\n\n## P5 & P6"
},
"typeVersion": 1
},
{
"id": "de0a9316-6130-4272-bce1-db37039e9c3d",
"name": "Nota adhesiva10",
"type": "n8n-nodes-base.stickyNote",
"position": [
-3968,
720
],
"parameters": {
"color": 5,
"width": 272,
"height": 144,
"content": "## Node By OXSR\nMore info and nodes\nhttps://n8n.io/creators/oxsr11/\n\nGit: https://github.com/oxsr"
},
"typeVersion": 1
}
],
"pinData": {},
"connections": {
"621aa928-83c5-48a4-8488-67c58fa1aec8": {
"main": [
[
{
"node": "05174bb2-efd5-4de8-9e17-26c2a85eff06",
"type": "main",
"index": 0
}
],
[
{
"node": "2ab278f9-2904-4b6c-a2a3-6a703c0bb3ae",
"type": "main",
"index": 0
}
]
]
},
"15992fbe-4ee5-4630-a377-f1b8d21ebc1b": {
"main": [
[
{
"node": "c12d6ae7-23ee-4f7a-9a33-7e43d1e475b9",
"type": "main",
"index": 0
}
],
[
{
"node": "ab8d3076-4420-48ef-b8fa-e25adbbd11e2",
"type": "main",
"index": 0
}
]
]
},
"d71fbfb6-3e9b-427b-afe3-6fd77ff77ede": {
"main": [
[
{
"node": "40169b8e-5948-4422-98d9-4bca87ccab73",
"type": "main",
"index": 0
}
]
]
},
"0d18ebca-52f3-46ed-934c-44c9bad53dab": {
"main": [
[
{
"node": "d665823a-b40a-45a5-ac12-0a789c1b8ecd",
"type": "main",
"index": 0
}
]
]
},
"40169b8e-5948-4422-98d9-4bca87ccab73": {
"main": [
[
{
"node": "5d98fe9e-890c-4c9f-81c8-309cc23dc8af",
"type": "main",
"index": 0
}
]
]
},
"c12d6ae7-23ee-4f7a-9a33-7e43d1e475b9": {
"main": [
[
{
"node": "39127cf7-f627-4fca-b1b7-c51b3656947d",
"type": "main",
"index": 0
}
]
]
},
"98abaa2b-ddbc-4c04-830e-d7112a6a57e2": {
"main": [
[
{
"node": "71d974a6-4f60-4573-be09-7cbb09502fa3",
"type": "main",
"index": 0
}
]
]
},
"05174bb2-efd5-4de8-9e17-26c2a85eff06": {
"main": [
[
{
"node": "c5a8dd49-3a82-45c7-a139-b30b4cc21e05",
"type": "main",
"index": 0
}
]
]
},
"175a77cd-bd0a-4849-8c9b-d36b4ddcecd9": {
"main": [
[
{
"node": "bca3322d-bc2e-4932-a5f2-a2e9548a8aef",
"type": "main",
"index": 0
}
]
]
},
"2ab278f9-2904-4b6c-a2a3-6a703c0bb3ae": {
"main": [
[
{
"node": "15992fbe-4ee5-4630-a377-f1b8d21ebc1b",
"type": "main",
"index": 0
}
]
]
},
"0526a778-8d63-4dcc-9815-a002ffd70a7f": {
"main": [
[
{
"node": "5948d577-4aea-4394-9b20-687f44efe5c8",
"type": "main",
"index": 0
}
],
[
{
"node": "4260d45a-8705-483a-b17f-58211512ba59",
"type": "main",
"index": 0
}
]
]
},
"5d98fe9e-890c-4c9f-81c8-309cc23dc8af": {
"main": [
[
{
"node": "6578bcc5-b412-46bf-88d5-8b285372e9b9",
"type": "main",
"index": 0
}
]
]
},
"39127cf7-f627-4fca-b1b7-c51b3656947d": {
"main": [
[
{
"node": "0526a778-8d63-4dcc-9815-a002ffd70a7f",
"type": "main",
"index": 0
}
]
]
},
"c371c8db-e752-48fa-999d-4813aeb13f38": {
"ai_tool": [
[
{
"node": "05174bb2-efd5-4de8-9e17-26c2a85eff06",
"type": "ai_tool",
"index": 0
}
]
]
},
"0461df33-2d2f-42e2-a0d3-288bd78275f1": {
"ai_memory": [
[
{
"node": "05174bb2-efd5-4de8-9e17-26c2a85eff06",
"type": "ai_memory",
"index": 0
}
]
]
},
"8df9234a-85ad-45b4-bc17-ad64edaab08a": {
"main": [
[
{
"node": "0d18ebca-52f3-46ed-934c-44c9bad53dab",
"type": "main",
"index": 0
}
],
[
{
"node": "59d0fe0a-9e27-4755-ac23-f46fa6d2aa95",
"type": "main",
"index": 0
}
]
]
},
"bca3322d-bc2e-4932-a5f2-a2e9548a8aef": {
"main": [
[
{
"node": "621aa928-83c5-48a4-8488-67c58fa1aec8",
"type": "main",
"index": 0
}
]
]
},
"6578bcc5-b412-46bf-88d5-8b285372e9b9": {
"main": [
[
{
"node": "9a5d11a2-0fc8-48a1-8fa0-c2f53fb49b54",
"type": "main",
"index": 0
}
],
[
{
"node": "98abaa2b-ddbc-4c04-830e-d7112a6a57e2",
"type": "main",
"index": 0
}
]
]
},
"f076a729-8f40-4a3b-ad32-83837964c42c": {
"main": [
[
{
"node": "d71fbfb6-3e9b-427b-afe3-6fd77ff77ede",
"type": "main",
"index": 0
}
]
]
},
"d665823a-b40a-45a5-ac12-0a789c1b8ecd": {
"main": [
[
{
"node": "f076a729-8f40-4a3b-ad32-83837964c42c",
"type": "main",
"index": 0
}
]
]
},
"71d974a6-4f60-4573-be09-7cbb09502fa3": {
"main": [
[
{
"node": "25c7cbaf-7eb9-4e71-a488-b6d16242d324",
"type": "main",
"index": 0
}
]
]
},
"25c7cbaf-7eb9-4e71-a488-b6d16242d324": {
"main": [
[
{
"node": "6578bcc5-b412-46bf-88d5-8b285372e9b9",
"type": "main",
"index": 0
}
]
]
},
"6dc738b3-4ebe-4f5b-b8b3-decf9ce15e70": {
"ai_languageModel": [
[
{
"node": "05174bb2-efd5-4de8-9e17-26c2a85eff06",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"afdbed61-346e-44a6-aa69-23a2b7ecf553": {
"ai_languageModel": [
[
{
"node": "2ab278f9-2904-4b6c-a2a3-6a703c0bb3ae",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"5948d577-4aea-4394-9b20-687f44efe5c8": {
"main": [
[
{
"node": "8df9234a-85ad-45b4-bc17-ad64edaab08a",
"type": "main",
"index": 0
}
]
]
},
"16ef0fa6-4259-43bf-b74f-3dc70d4b54e3": {
"ai_outputParser": [
[
{
"node": "2ab278f9-2904-4b6c-a2a3-6a703c0bb3ae",
"type": "ai_outputParser",
"index": 0
}
]
]
},
"6fb7c3fa-7851-49cd-8d0b-01df74a80f35": {
"ai_tool": [
[
{
"node": "25c7cbaf-7eb9-4e71-a488-b6d16242d324",
"type": "ai_tool",
"index": 0
}
]
]
},
"fdcce6e6-f00f-4f84-ac6e-2e181452d3ac": {
"ai_tool": [
[
{
"node": "05174bb2-efd5-4de8-9e17-26c2a85eff06",
"type": "ai_tool",
"index": 0
}
]
]
}
}
}¿Cómo usar este flujo de trabajo?
Copie el código de configuración JSON de arriba, cree un nuevo flujo de trabajo en su instancia de n8n y seleccione "Importar desde JSON", pegue la configuración y luego modifique la configuración de credenciales según sea necesario.
¿En qué escenarios es adecuado este flujo de trabajo?
Avanzado - Investigación de mercado, IA Multimodal
¿Es de pago?
Este flujo de trabajo es completamente gratuito, puede importarlo y usarlo directamente. Sin embargo, tenga en cuenta que los servicios de terceros utilizados en el flujo de trabajo (como la API de OpenAI) pueden requerir un pago por su cuenta.
Flujos de trabajo relacionados recomendados
Compartir este flujo de trabajo