Usar Bright Data, Gemini y Pinecone para crear un conjunto de datos vectorial listo para IA para LLMs
Este es unBuilding Blocks, AIflujo de automatización del dominio deautomatización que contiene 21 nodos.Utiliza principalmente nodos como Set, HttpRequest, ManualTrigger, Agent, ChainLlm, combinando tecnología de inteligencia artificial para lograr automatización inteligente. Usar Bright Data, Gemini y Pinecone para crear conjuntos de datos vectoriales listos para IA para LLM
- •Pueden requerirse credenciales de autenticación para la API de destino
- •Clave de API de Google Gemini
- •Clave de API de Pinecone
Nodos utilizados (21)
{
"id": "3Lih0LVosR8dZbla",
"meta": {
"instanceId": "885b4fb4a6a9c2cb5621429a7b972df0d05bb724c20ac7dac7171b62f1c7ef40",
"templateCredsSetupCompleted": true
},
"name": "Create AI-Ready Vector Datasets for LLMs with Bright Data, Gemini & Pinecone",
"tags": [
{
"id": "Kujft2FOjmOVQAmJ",
"name": "Engineering",
"createdAt": "2025-04-09T01:31:00.558Z",
"updatedAt": "2025-04-09T01:31:00.558Z"
},
{
"id": "ZOwtAMLepQaGW76t",
"name": "Building Blocks",
"createdAt": "2025-04-13T15:23:40.462Z",
"updatedAt": "2025-04-13T15:23:40.462Z"
},
{
"id": "ddPkw7Hg5dZhQu2w",
"name": "AI",
"createdAt": "2025-04-13T05:38:08.053Z",
"updatedAt": "2025-04-13T05:38:08.053Z"
}
],
"nodes": [
{
"id": "0a468953-e348-420e-a6b3-c55fb20d3cbf",
"name": "Al hacer clic en 'Probar flujo de trabajo'",
"type": "n8n-nodes-base.manualTrigger",
"position": [
200,
-710
],
"parameters": {},
"typeVersion": 1
},
{
"id": "3725e480-246f-4f32-b0a7-b946cacbe830",
"name": "Agente de IA",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
1236,
-60
],
"parameters": {
"text": "=Format the below search result\n\n{{ $json.output.search_result }}",
"options": {},
"promptType": "define",
"hasOutputParser": true
},
"typeVersion": 1.8
},
{
"id": "30a12b8e-02f5-4b2e-bf9f-20fd9658405e",
"name": "Almacén vectorial Pinecone",
"type": "@n8n/n8n-nodes-langchain.vectorStorePinecone",
"position": [
1628,
-10
],
"parameters": {
"mode": "insert",
"options": {},
"pineconeIndex": {
"__rl": true,
"mode": "list",
"value": "hacker-news",
"cachedResultName": "hacker-news"
}
},
"credentials": {
"pineconeApi": {
"id": "wdfRQ6NE8yjCDFhY",
"name": "PineconeApi account"
}
},
"typeVersion": 1.1
},
{
"id": "1738dea6-fa4f-4a8d-a6fb-2f01feb1a6d5",
"name": "Embeddings Google Gemini",
"type": "@n8n/n8n-nodes-langchain.embeddingsGoogleGemini",
"position": [
1612,
210
],
"parameters": {
"modelName": "models/text-embedding-004"
},
"credentials": {
"googlePalmApi": {
"id": "YeO7dHZnuGBVQKVZ",
"name": "Google Gemini(PaLM) Api account"
}
},
"typeVersion": 1
},
{
"id": "e6443541-de71-4d26-ad58-d7c72868a190",
"name": "Cargador de datos predeterminado",
"type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
"position": [
1760,
220
],
"parameters": {
"options": {},
"jsonData": "={{ $('Information Extractor with Data Formatter').item.json.output.search_result }}",
"jsonMode": "expressionData"
},
"typeVersion": 1
},
{
"id": "09ffc8cd-096f-47fe-937d-f8ab4fb41266",
"name": "Separador de texto recursivo por caracteres",
"type": "@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter",
"position": [
1820,
410
],
"parameters": {
"options": {}
},
"typeVersion": 1
},
{
"id": "90cc9aa4-0931-4c52-8734-e4e0de820205",
"name": "Modelo de chat Google Gemini 1",
"type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
"position": [
1240,
160
],
"parameters": {
"options": {},
"modelName": "models/gemini-2.0-flash-exp"
},
"credentials": {
"googlePalmApi": {
"id": "YeO7dHZnuGBVQKVZ",
"name": "Google Gemini(PaLM) Api account"
}
},
"typeVersion": 1
},
{
"id": "1090a4af-7e5d-446b-a537-3afe48cd4909",
"name": "Modelo de chat Google Gemini 2",
"type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
"position": [
948,
-340
],
"parameters": {
"options": {},
"modelName": "models/gemini-2.0-flash-exp"
},
"credentials": {
"googlePalmApi": {
"id": "YeO7dHZnuGBVQKVZ",
"name": "Google Gemini(PaLM) Api account"
}
},
"typeVersion": 1
},
{
"id": "324c530c-0a03-411e-acb0-d82e9dc635cf",
"name": "Modelo de chat Google Gemini",
"type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
"position": [
948,
160
],
"parameters": {
"options": {},
"modelName": "models/gemini-2.0-flash-exp"
},
"credentials": {
"googlePalmApi": {
"id": "YeO7dHZnuGBVQKVZ",
"name": "Google Gemini(PaLM) Api account"
}
},
"typeVersion": 1
},
{
"id": "3226a2d6-ade1-4d6a-95c5-0be4d787a947",
"name": "Analizador de salida estructurada",
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
"position": [
1400,
160
],
"parameters": {
"jsonSchemaExample": "[{\n\t\"id\": \"<string>\",\n\t\"title\": \"<string>\",\n \"summary\": \"<string>\",\n \"keywords\": [\"\"],\n \"topics\": [\"\"]\n}]"
},
"typeVersion": 1.2
},
{
"id": "a739a314-900a-4ef7-9cc2-1b65374e2e05",
"name": "Nota adhesiva",
"type": "n8n-nodes-base.stickyNote",
"position": [
40,
-360
],
"parameters": {
"width": 480,
"height": 220,
"content": "## Note\nPlease make sure to set the URL for web crawling. \n\nWeb-Unlocker Product is being utilized for performing the web scrapping. \n\nThis workflow is utilizing the Basic LLM Chain, Information Extraction with the AI Agents for formatting, extracting and persisting the response in PineCone Vector Database"
},
"typeVersion": 1
},
{
"id": "3dca6d46-c423-4fb5-a6e4-c2aa2852d51c",
"name": "Establecer campos - URL y URL Webhook",
"type": "n8n-nodes-base.set",
"notes": "Set the URL which you are interested to scrap the data",
"position": [
420,
-710
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "1c132dd6-31e4-453b-a8cf-cad9845fe55b",
"name": "url",
"type": "string",
"value": "https://news.ycombinator.com?product=unlocker&method=api"
},
{
"id": "90f3272b-d13d-44e2-8b4c-0943648cfce9",
"name": "webhook_url",
"type": "string",
"value": "https://webhook.site/bc804ce5-4a45-4177-a68a-99c80e5c86e6"
}
]
}
},
"notesInFlow": true,
"typeVersion": 3.4
},
{
"id": "216a3261-a398-484c-9bf4-ca5966b829b6",
"name": "Make una solicitud web",
"type": "n8n-nodes-base.httpRequest",
"position": [
640,
-260
],
"parameters": {
"url": "https://api.brightdata.com/request",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"authentication": "genericCredentialType",
"bodyParameters": {
"parameters": [
{
"name": "zone",
"value": "web_unlocker1"
},
{
"name": "url",
"value": "={{ $json.url }}"
},
{
"name": "format",
"value": "raw"
}
]
},
"genericAuthType": "httpHeaderAuth",
"headerParameters": {
"parameters": [
{}
]
}
},
"credentials": {
"httpHeaderAuth": {
"id": "kdbqXuxIR8qIxF7y",
"name": "Header Auth account"
}
},
"typeVersion": 4.2
},
{
"id": "0c74e21c-3007-4297-b6ab-8ee17f4c6436",
"name": "Formateador de datos estructurados JSON",
"type": "@n8n/n8n-nodes-langchain.chainLlm",
"position": [
860,
-560
],
"parameters": {
"text": "=Format the below response and produce a textual data. Output the response as per the below JSON schema.\n\nHere's the input: {{ $json.data }}\nHere's the JSON schema: \n\n[{\n \"rank\": { \"type\": \"integer\" },\n \"title\": { \"type\": \"string\" },\n \"site\": { \"type\": \"string\" },\n \"points\": { \"type\": \"integer\" },\n \"user\": { \"type\": \"string\" },\n \"age\": { \"type\": \"string\" },\n \"comments\": { \"type\": \"string\" }\n}]",
"messages": {
"messageValues": [
{
"message": "You are an expert data formatter"
}
]
},
"promptType": "define"
},
"typeVersion": 1.6
},
{
"id": "012d4bb0-2b58-47cd-9cea-b4e0dced9082",
"name": "Webhook para datos estructurados",
"type": "n8n-nodes-base.httpRequest",
"position": [
1314,
-860
],
"parameters": {
"url": "={{ $json.webhook_url }}",
"options": {},
"sendBody": true,
"bodyParameters": {
"parameters": [
{
"name": "response",
"value": "={{ $json.text }}"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "93b35e5e-6f52-4aeb-8f1b-39cc495beefe",
"name": "Webhook para respuesta estructurada de agente de IA",
"type": "n8n-nodes-base.httpRequest",
"position": [
1750,
-660
],
"parameters": {
"url": "={{ $json.webhook_url }}",
"options": {},
"sendBody": true,
"bodyParameters": {
"parameters": [
{
"name": "response",
"value": "={{ $json.output }}"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "251b4251-255c-48c6-999b-02227fa2de9b",
"name": "Nota adhesiva 1",
"type": "n8n-nodes-base.stickyNote",
"position": [
800,
-620
],
"parameters": {
"width": 360,
"height": 420,
"content": "## AI Data Formatter\n"
},
"typeVersion": 1
},
{
"id": "f62463cd-6be3-4942-a636-de980a3154b4",
"name": "Nota adhesiva 2",
"type": "n8n-nodes-base.stickyNote",
"position": [
1560,
-160
],
"parameters": {
"color": 4,
"width": 520,
"height": 720,
"content": "## Vector Database Persistence\n"
},
"typeVersion": 1
},
{
"id": "ad20cc91-766a-4a57-be54-6f0d09a784eb",
"name": "Nota adhesiva 3",
"type": "n8n-nodes-base.stickyNote",
"position": [
1260,
-920
],
"parameters": {
"color": 3,
"width": 680,
"height": 440,
"content": "## Webhook Notification Handler\n"
},
"typeVersion": 1
},
{
"id": "37ab5c0f-d36e-4131-844d-20a22d3f2861",
"name": "Extractor de información con formateador de datos",
"type": "@n8n/n8n-nodes-langchain.informationExtractor",
"position": [
860,
-60
],
"parameters": {
"text": "={{ $json.data }}",
"options": {
"systemPromptTemplate": "You are an expert HTML extractor. Your job is to analyze the search result and extract the content as a collection on items"
},
"attributes": {
"attributes": [
{
"name": "search_result",
"description": "Search Response"
}
]
}
},
"typeVersion": 1
},
{
"id": "e04e189a-8ba9-4ef4-9a49-fc13daf00828",
"name": "Nota adhesiva 4",
"type": "n8n-nodes-base.stickyNote",
"position": [
800,
-160
],
"parameters": {
"color": 5,
"width": 720,
"height": 720,
"content": "## Data Extraction/Formatting with the AI Agent\n"
},
"typeVersion": 1
}
],
"active": false,
"pinData": {},
"settings": {
"executionOrder": "v1"
},
"versionId": "799fb406-600d-45a5-b926-24b8844f33a5",
"connections": {
"3725e480-246f-4f32-b0a7-b946cacbe830": {
"main": [
[
{
"node": "30a12b8e-02f5-4b2e-bf9f-20fd9658405e",
"type": "main",
"index": 0
},
{
"node": "93b35e5e-6f52-4aeb-8f1b-39cc495beefe",
"type": "main",
"index": 0
}
]
]
},
"216a3261-a398-484c-9bf4-ca5966b829b6": {
"main": [
[
{
"node": "0c74e21c-3007-4297-b6ab-8ee17f4c6436",
"type": "main",
"index": 0
},
{
"node": "37ab5c0f-d36e-4131-844d-20a22d3f2861",
"type": "main",
"index": 0
}
]
]
},
"e6443541-de71-4d26-ad58-d7c72868a190": {
"ai_document": [
[
{
"node": "30a12b8e-02f5-4b2e-bf9f-20fd9658405e",
"type": "ai_document",
"index": 0
}
]
]
},
"30a12b8e-02f5-4b2e-bf9f-20fd9658405e": {
"ai_tool": [
[]
]
},
"1738dea6-fa4f-4a8d-a6fb-2f01feb1a6d5": {
"ai_embedding": [
[
{
"node": "30a12b8e-02f5-4b2e-bf9f-20fd9658405e",
"type": "ai_embedding",
"index": 0
}
]
]
},
"324c530c-0a03-411e-acb0-d82e9dc635cf": {
"ai_languageModel": [
[
{
"node": "37ab5c0f-d36e-4131-844d-20a22d3f2861",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"3226a2d6-ade1-4d6a-95c5-0be4d787a947": {
"ai_outputParser": [
[
{
"node": "3725e480-246f-4f32-b0a7-b946cacbe830",
"type": "ai_outputParser",
"index": 0
}
]
]
},
"90cc9aa4-0931-4c52-8734-e4e0de820205": {
"ai_languageModel": [
[
{
"node": "3725e480-246f-4f32-b0a7-b946cacbe830",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"1090a4af-7e5d-446b-a537-3afe48cd4909": {
"ai_languageModel": [
[
{
"node": "0c74e21c-3007-4297-b6ab-8ee17f4c6436",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"0c74e21c-3007-4297-b6ab-8ee17f4c6436": {
"main": [
[
{
"node": "012d4bb0-2b58-47cd-9cea-b4e0dced9082",
"type": "main",
"index": 0
}
]
]
},
"3dca6d46-c423-4fb5-a6e4-c2aa2852d51c": {
"main": [
[
{
"node": "216a3261-a398-484c-9bf4-ca5966b829b6",
"type": "main",
"index": 0
},
{
"node": "012d4bb0-2b58-47cd-9cea-b4e0dced9082",
"type": "main",
"index": 0
},
{
"node": "93b35e5e-6f52-4aeb-8f1b-39cc495beefe",
"type": "main",
"index": 0
}
]
]
},
"09ffc8cd-096f-47fe-937d-f8ab4fb41266": {
"ai_textSplitter": [
[
{
"node": "e6443541-de71-4d26-ad58-d7c72868a190",
"type": "ai_textSplitter",
"index": 0
}
]
]
},
"0a468953-e348-420e-a6b3-c55fb20d3cbf": {
"main": [
[
{
"node": "3dca6d46-c423-4fb5-a6e4-c2aa2852d51c",
"type": "main",
"index": 0
}
]
]
},
"37ab5c0f-d36e-4131-844d-20a22d3f2861": {
"main": [
[
{
"node": "3725e480-246f-4f32-b0a7-b946cacbe830",
"type": "main",
"index": 0
}
]
]
}
}
}¿Cómo usar este flujo de trabajo?
Copie el código de configuración JSON de arriba, cree un nuevo flujo de trabajo en su instancia de n8n y seleccione "Importar desde JSON", pegue la configuración y luego modifique la configuración de credenciales según sea necesario.
¿En qué escenarios es adecuado este flujo de trabajo?
Avanzado - Bloques de construcción, Inteligencia Artificial
¿Es de pago?
Este flujo de trabajo es completamente gratuito, puede importarlo y usarlo directamente. Sin embargo, tenga en cuenta que los servicios de terceros utilizados en el flujo de trabajo (como la API de OpenAI) pueden requerir un pago por su cuenta.
Flujos de trabajo relacionados recomendados
Ranjan Dailata
@ranjancseCompartir este flujo de trabajo