Créer un ensemble de données vectoriel prêt pour l'IA pour les LLM à l'aide de Bright Data, Gemini et Pinecone
Ceci est unBuilding Blocks, AIworkflow d'automatisation du domainecontenant 21 nœuds.Utilise principalement des nœuds comme Set, HttpRequest, ManualTrigger, Agent, ChainLlm, combinant la technologie d'intelligence artificielle pour une automatisation intelligente. Créer des jeux de données vectoriels prêts pour l'IA pour les LLM avec Bright Data, Gemini et Pinecone
- •Peut nécessiter les informations d'identification d'authentification de l'API cible
- •Clé API Google Gemini
- •Clé API Pinecone
Nœuds utilisés (21)
{
"id": "3Lih0LVosR8dZbla",
"meta": {
"instanceId": "885b4fb4a6a9c2cb5621429a7b972df0d05bb724c20ac7dac7171b62f1c7ef40",
"templateCredsSetupCompleted": true
},
"name": "Create AI-Ready Vector Datasets for LLMs with Bright Data, Gemini & Pinecone",
"tags": [
{
"id": "Kujft2FOjmOVQAmJ",
"name": "Engineering",
"createdAt": "2025-04-09T01:31:00.558Z",
"updatedAt": "2025-04-09T01:31:00.558Z"
},
{
"id": "ZOwtAMLepQaGW76t",
"name": "Building Blocks",
"createdAt": "2025-04-13T15:23:40.462Z",
"updatedAt": "2025-04-13T15:23:40.462Z"
},
{
"id": "ddPkw7Hg5dZhQu2w",
"name": "AI",
"createdAt": "2025-04-13T05:38:08.053Z",
"updatedAt": "2025-04-13T05:38:08.053Z"
}
],
"nodes": [
{
"id": "0a468953-e348-420e-a6b3-c55fb20d3cbf",
"name": "Lors du clic sur 'Tester le workflow'",
"type": "n8n-nodes-base.manualTrigger",
"position": [
200,
-710
],
"parameters": {},
"typeVersion": 1
},
{
"id": "3725e480-246f-4f32-b0a7-b946cacbe830",
"name": "Agent IA",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
1236,
-60
],
"parameters": {
"text": "=Format the below search result\n\n{{ $json.output.search_result }}",
"options": {},
"promptType": "define",
"hasOutputParser": true
},
"typeVersion": 1.8
},
{
"id": "30a12b8e-02f5-4b2e-bf9f-20fd9658405e",
"name": "Magasin vectoriel Pinecone",
"type": "@n8n/n8n-nodes-langchain.vectorStorePinecone",
"position": [
1628,
-10
],
"parameters": {
"mode": "insert",
"options": {},
"pineconeIndex": {
"__rl": true,
"mode": "list",
"value": "hacker-news",
"cachedResultName": "hacker-news"
}
},
"credentials": {
"pineconeApi": {
"id": "wdfRQ6NE8yjCDFhY",
"name": "PineconeApi account"
}
},
"typeVersion": 1.1
},
{
"id": "1738dea6-fa4f-4a8d-a6fb-2f01feb1a6d5",
"name": "Embeddings Google Gemini",
"type": "@n8n/n8n-nodes-langchain.embeddingsGoogleGemini",
"position": [
1612,
210
],
"parameters": {
"modelName": "models/text-embedding-004"
},
"credentials": {
"googlePalmApi": {
"id": "YeO7dHZnuGBVQKVZ",
"name": "Google Gemini(PaLM) Api account"
}
},
"typeVersion": 1
},
{
"id": "e6443541-de71-4d26-ad58-d7c72868a190",
"name": "Chargeur de données par défaut",
"type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
"position": [
1760,
220
],
"parameters": {
"options": {},
"jsonData": "={{ $('Information Extractor with Data Formatter').item.json.output.search_result }}",
"jsonMode": "expressionData"
},
"typeVersion": 1
},
{
"id": "09ffc8cd-096f-47fe-937d-f8ab4fb41266",
"name": "Séparateur de texte récursif",
"type": "@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter",
"position": [
1820,
410
],
"parameters": {
"options": {}
},
"typeVersion": 1
},
{
"id": "90cc9aa4-0931-4c52-8734-e4e0de820205",
"name": "Modèle de chat Google Gemini 1",
"type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
"position": [
1240,
160
],
"parameters": {
"options": {},
"modelName": "models/gemini-2.0-flash-exp"
},
"credentials": {
"googlePalmApi": {
"id": "YeO7dHZnuGBVQKVZ",
"name": "Google Gemini(PaLM) Api account"
}
},
"typeVersion": 1
},
{
"id": "1090a4af-7e5d-446b-a537-3afe48cd4909",
"name": "Modèle de chat Google Gemini 2",
"type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
"position": [
948,
-340
],
"parameters": {
"options": {},
"modelName": "models/gemini-2.0-flash-exp"
},
"credentials": {
"googlePalmApi": {
"id": "YeO7dHZnuGBVQKVZ",
"name": "Google Gemini(PaLM) Api account"
}
},
"typeVersion": 1
},
{
"id": "324c530c-0a03-411e-acb0-d82e9dc635cf",
"name": "Modèle de chat Google Gemini",
"type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
"position": [
948,
160
],
"parameters": {
"options": {},
"modelName": "models/gemini-2.0-flash-exp"
},
"credentials": {
"googlePalmApi": {
"id": "YeO7dHZnuGBVQKVZ",
"name": "Google Gemini(PaLM) Api account"
}
},
"typeVersion": 1
},
{
"id": "3226a2d6-ade1-4d6a-95c5-0be4d787a947",
"name": "Analyseur de sortie structurée",
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
"position": [
1400,
160
],
"parameters": {
"jsonSchemaExample": "[{\n\t\"id\": \"<string>\",\n\t\"title\": \"<string>\",\n \"summary\": \"<string>\",\n \"keywords\": [\"\"],\n \"topics\": [\"\"]\n}]"
},
"typeVersion": 1.2
},
{
"id": "a739a314-900a-4ef7-9cc2-1b65374e2e05",
"name": "Note autocollante",
"type": "n8n-nodes-base.stickyNote",
"position": [
40,
-360
],
"parameters": {
"width": 480,
"height": 220,
"content": "## Note\nPlease make sure to set the URL for web crawling. \n\nWeb-Unlocker Product is being utilized for performing the web scrapping. \n\nThis workflow is utilizing the Basic LLM Chain, Information Extraction with the AI Agents for formatting, extracting and persisting the response in PineCone Vector Database"
},
"typeVersion": 1
},
{
"id": "3dca6d46-c423-4fb5-a6e4-c2aa2852d51c",
"name": "Définir les champs - URL et Webhook URL",
"type": "n8n-nodes-base.set",
"notes": "Set the URL which you are interested to scrap the data",
"position": [
420,
-710
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "1c132dd6-31e4-453b-a8cf-cad9845fe55b",
"name": "url",
"type": "string",
"value": "https://news.ycombinator.com?product=unlocker&method=api"
},
{
"id": "90f3272b-d13d-44e2-8b4c-0943648cfce9",
"name": "webhook_url",
"type": "string",
"value": "https://webhook.site/bc804ce5-4a45-4177-a68a-99c80e5c86e6"
}
]
}
},
"notesInFlow": true,
"typeVersion": 3.4
},
{
"id": "216a3261-a398-484c-9bf4-ca5966b829b6",
"name": "Make une requête web",
"type": "n8n-nodes-base.httpRequest",
"position": [
640,
-260
],
"parameters": {
"url": "https://api.brightdata.com/request",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"authentication": "genericCredentialType",
"bodyParameters": {
"parameters": [
{
"name": "zone",
"value": "web_unlocker1"
},
{
"name": "url",
"value": "={{ $json.url }}"
},
{
"name": "format",
"value": "raw"
}
]
},
"genericAuthType": "httpHeaderAuth",
"headerParameters": {
"parameters": [
{}
]
}
},
"credentials": {
"httpHeaderAuth": {
"id": "kdbqXuxIR8qIxF7y",
"name": "Header Auth account"
}
},
"typeVersion": 4.2
},
{
"id": "0c74e21c-3007-4297-b6ab-8ee17f4c6436",
"name": "Formateur de données structurées JSON",
"type": "@n8n/n8n-nodes-langchain.chainLlm",
"position": [
860,
-560
],
"parameters": {
"text": "=Format the below response and produce a textual data. Output the response as per the below JSON schema.\n\nHere's the input: {{ $json.data }}\nHere's the JSON schema: \n\n[{\n \"rank\": { \"type\": \"integer\" },\n \"title\": { \"type\": \"string\" },\n \"site\": { \"type\": \"string\" },\n \"points\": { \"type\": \"integer\" },\n \"user\": { \"type\": \"string\" },\n \"age\": { \"type\": \"string\" },\n \"comments\": { \"type\": \"string\" }\n}]",
"messages": {
"messageValues": [
{
"message": "You are an expert data formatter"
}
]
},
"promptType": "define"
},
"typeVersion": 1.6
},
{
"id": "012d4bb0-2b58-47cd-9cea-b4e0dced9082",
"name": "Webhook pour données structurées",
"type": "n8n-nodes-base.httpRequest",
"position": [
1314,
-860
],
"parameters": {
"url": "={{ $json.webhook_url }}",
"options": {},
"sendBody": true,
"bodyParameters": {
"parameters": [
{
"name": "response",
"value": "={{ $json.text }}"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "93b35e5e-6f52-4aeb-8f1b-39cc495beefe",
"name": "Webhook pour réponse d'agent IA structurée",
"type": "n8n-nodes-base.httpRequest",
"position": [
1750,
-660
],
"parameters": {
"url": "={{ $json.webhook_url }}",
"options": {},
"sendBody": true,
"bodyParameters": {
"parameters": [
{
"name": "response",
"value": "={{ $json.output }}"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "251b4251-255c-48c6-999b-02227fa2de9b",
"name": "Note autocollante 1",
"type": "n8n-nodes-base.stickyNote",
"position": [
800,
-620
],
"parameters": {
"width": 360,
"height": 420,
"content": "## AI Data Formatter\n"
},
"typeVersion": 1
},
{
"id": "f62463cd-6be3-4942-a636-de980a3154b4",
"name": "Note autocollante 2",
"type": "n8n-nodes-base.stickyNote",
"position": [
1560,
-160
],
"parameters": {
"color": 4,
"width": 520,
"height": 720,
"content": "## Vector Database Persistence\n"
},
"typeVersion": 1
},
{
"id": "ad20cc91-766a-4a57-be54-6f0d09a784eb",
"name": "Note autocollante 3",
"type": "n8n-nodes-base.stickyNote",
"position": [
1260,
-920
],
"parameters": {
"color": 3,
"width": 680,
"height": 440,
"content": "## Webhook Notification Handler\n"
},
"typeVersion": 1
},
{
"id": "37ab5c0f-d36e-4131-844d-20a22d3f2861",
"name": "Extracteur d'informations avec formateur de données",
"type": "@n8n/n8n-nodes-langchain.informationExtractor",
"position": [
860,
-60
],
"parameters": {
"text": "={{ $json.data }}",
"options": {
"systemPromptTemplate": "You are an expert HTML extractor. Your job is to analyze the search result and extract the content as a collection on items"
},
"attributes": {
"attributes": [
{
"name": "search_result",
"description": "Search Response"
}
]
}
},
"typeVersion": 1
},
{
"id": "e04e189a-8ba9-4ef4-9a49-fc13daf00828",
"name": "Note autocollante 4",
"type": "n8n-nodes-base.stickyNote",
"position": [
800,
-160
],
"parameters": {
"color": 5,
"width": 720,
"height": 720,
"content": "## Data Extraction/Formatting with the AI Agent\n"
},
"typeVersion": 1
}
],
"active": false,
"pinData": {},
"settings": {
"executionOrder": "v1"
},
"versionId": "799fb406-600d-45a5-b926-24b8844f33a5",
"connections": {
"3725e480-246f-4f32-b0a7-b946cacbe830": {
"main": [
[
{
"node": "30a12b8e-02f5-4b2e-bf9f-20fd9658405e",
"type": "main",
"index": 0
},
{
"node": "93b35e5e-6f52-4aeb-8f1b-39cc495beefe",
"type": "main",
"index": 0
}
]
]
},
"216a3261-a398-484c-9bf4-ca5966b829b6": {
"main": [
[
{
"node": "0c74e21c-3007-4297-b6ab-8ee17f4c6436",
"type": "main",
"index": 0
},
{
"node": "37ab5c0f-d36e-4131-844d-20a22d3f2861",
"type": "main",
"index": 0
}
]
]
},
"e6443541-de71-4d26-ad58-d7c72868a190": {
"ai_document": [
[
{
"node": "30a12b8e-02f5-4b2e-bf9f-20fd9658405e",
"type": "ai_document",
"index": 0
}
]
]
},
"30a12b8e-02f5-4b2e-bf9f-20fd9658405e": {
"ai_tool": [
[]
]
},
"1738dea6-fa4f-4a8d-a6fb-2f01feb1a6d5": {
"ai_embedding": [
[
{
"node": "30a12b8e-02f5-4b2e-bf9f-20fd9658405e",
"type": "ai_embedding",
"index": 0
}
]
]
},
"324c530c-0a03-411e-acb0-d82e9dc635cf": {
"ai_languageModel": [
[
{
"node": "37ab5c0f-d36e-4131-844d-20a22d3f2861",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"3226a2d6-ade1-4d6a-95c5-0be4d787a947": {
"ai_outputParser": [
[
{
"node": "3725e480-246f-4f32-b0a7-b946cacbe830",
"type": "ai_outputParser",
"index": 0
}
]
]
},
"90cc9aa4-0931-4c52-8734-e4e0de820205": {
"ai_languageModel": [
[
{
"node": "3725e480-246f-4f32-b0a7-b946cacbe830",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"1090a4af-7e5d-446b-a537-3afe48cd4909": {
"ai_languageModel": [
[
{
"node": "0c74e21c-3007-4297-b6ab-8ee17f4c6436",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"0c74e21c-3007-4297-b6ab-8ee17f4c6436": {
"main": [
[
{
"node": "012d4bb0-2b58-47cd-9cea-b4e0dced9082",
"type": "main",
"index": 0
}
]
]
},
"3dca6d46-c423-4fb5-a6e4-c2aa2852d51c": {
"main": [
[
{
"node": "216a3261-a398-484c-9bf4-ca5966b829b6",
"type": "main",
"index": 0
},
{
"node": "012d4bb0-2b58-47cd-9cea-b4e0dced9082",
"type": "main",
"index": 0
},
{
"node": "93b35e5e-6f52-4aeb-8f1b-39cc495beefe",
"type": "main",
"index": 0
}
]
]
},
"09ffc8cd-096f-47fe-937d-f8ab4fb41266": {
"ai_textSplitter": [
[
{
"node": "e6443541-de71-4d26-ad58-d7c72868a190",
"type": "ai_textSplitter",
"index": 0
}
]
]
},
"0a468953-e348-420e-a6b3-c55fb20d3cbf": {
"main": [
[
{
"node": "3dca6d46-c423-4fb5-a6e4-c2aa2852d51c",
"type": "main",
"index": 0
}
]
]
},
"37ab5c0f-d36e-4131-844d-20a22d3f2861": {
"main": [
[
{
"node": "3725e480-246f-4f32-b0a7-b946cacbe830",
"type": "main",
"index": 0
}
]
]
}
}
}Comment utiliser ce workflow ?
Copiez le code de configuration JSON ci-dessus, créez un nouveau workflow dans votre instance n8n et sélectionnez "Importer depuis le JSON", collez la configuration et modifiez les paramètres d'authentification selon vos besoins.
Dans quelles scénarios ce workflow est-il adapté ?
Avancé - Blocs de construction, Intelligence Artificielle
Est-ce payant ?
Ce workflow est entièrement gratuit et peut être utilisé directement. Veuillez noter que les services tiers utilisés dans le workflow (comme l'API OpenAI) peuvent nécessiter un paiement de votre part.
Workflows recommandés
Ranjan Dailata
@ranjancsePartager ce workflow