Extracción estructurada de datos por lotes con el web scraper de Bright Data
Este es unEngineering, Productflujo de automatización del dominio deautomatización que contiene 16 nodos.Utiliza principalmente nodos como If, Set, Wait, Function, Aggregate. Rastreo web por lotes asíncrono con Bright Data y notificaciones de Webhook
- •Pueden requerirse credenciales de autenticación para la API de destino
Nodos utilizados (16)
Categoría
{
"id": "OjwmaLrXhW4pO5ph",
"meta": {
"instanceId": "885b4fb4a6a9c2cb5621429a7b972df0d05bb724c20ac7dac7171b62f1c7ef40"
},
"name": "Structured Bulk Data Extract with Bright Data Web Scraper",
"tags": [
{
"id": "Kujft2FOjmOVQAmJ",
"name": "Engineering",
"createdAt": "2025-04-09T01:31:00.558Z",
"updatedAt": "2025-04-09T01:31:00.558Z"
},
{
"id": "ZOwtAMLepQaGW76t",
"name": "Building Blocks",
"createdAt": "2025-04-13T15:23:40.462Z",
"updatedAt": "2025-04-13T15:23:40.462Z"
}
],
"nodes": [
{
"id": "1bdca5ae-1e56-4cf2-a8dc-e135a6a2dfec",
"name": "Al hacer clic en 'Probar flujo de trabajo'",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-900,
-395
],
"parameters": {},
"typeVersion": 1
},
{
"id": "533968cd-1329-4a86-8875-478600ed82b7",
"name": "Si",
"type": "n8n-nodes-base.if",
"position": [
200,
-470
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "6a7e5360-4cb5-4806-892e-5c85037fa71c",
"operator": {
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.status }}",
"rightValue": "ready"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "83991fdf-0402-4de3-bbb5-7050e3e9fb62",
"name": "Establecer ID de instantánea",
"type": "n8n-nodes-base.set",
"position": [
-240,
-395
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "2c3369c6-9206-45d7-9349-f577baeaf189",
"name": "snapshot_id",
"type": "string",
"value": "={{ $json.snapshot_id }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "408a36af-decb-49b3-a95e-a2df0b6eea5f",
"name": "Descargar instantánea",
"type": "n8n-nodes-base.httpRequest",
"position": [
640,
-520
],
"parameters": {
"url": "=https://api.brightdata.com/datasets/v3/snapshot/{{ $json.snapshot_id }}",
"options": {
"timeout": 10000
},
"sendQuery": true,
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"queryParameters": {
"parameters": [
{
"name": "format",
"value": "json"
}
]
}
},
"credentials": {
"httpHeaderAuth": {
"id": "kdbqXuxIR8qIxF7y",
"name": "Header Auth account"
}
},
"typeVersion": 4.2
},
{
"id": "9d6cd882-c287-46ca-bc1e-df6b995fc422",
"name": "Esperar",
"type": "n8n-nodes-base.wait",
"position": [
420,
-295
],
"webhookId": "631cd5de-36b3-4264-88ae-45b30e2c2ccc",
"parameters": {
"amount": 30
},
"typeVersion": 1.1
},
{
"id": "c9cf847a-6399-4c93-a901-30f1c0e7408a",
"name": "Verificar errores",
"type": "n8n-nodes-base.if",
"position": [
420,
-520
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "b267071c-7102-407b-a98d-f613bcb1a106",
"operator": {
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.errors.toString() }}",
"rightValue": "0"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "b648614e-c33e-4818-8348-e95df56928c7",
"name": "Verificar estado de la instantánea",
"type": "n8n-nodes-base.httpRequest",
"position": [
-20,
-395
],
"parameters": {
"url": "=https://api.brightdata.com/datasets/v3/progress/{{ $json.snapshot_id }}",
"options": {},
"sendHeaders": true,
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"headerParameters": {
"parameters": [
{}
]
}
},
"credentials": {
"httpHeaderAuth": {
"id": "kdbqXuxIR8qIxF7y",
"name": "Header Auth account"
}
},
"typeVersion": 4.2
},
{
"id": "408a1584-666f-471e-bfcd-c4d857319688",
"name": "Iniciar una notificación Webhook",
"type": "n8n-nodes-base.httpRequest",
"position": [
1080,
-520
],
"parameters": {
"url": "https://webhook.site/daf9d591-a130-4010-b1d3-0c66f8fcf467",
"options": {},
"sendBody": true,
"bodyParameters": {
"parameters": [
{
"name": "response",
"value": "={{ $json.data[0] }}"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "6548a794-a4fd-4050-b07d-bc7ca4517882",
"name": "Agregar respuesta JSON",
"type": "n8n-nodes-base.aggregate",
"position": [
860,
-520
],
"parameters": {
"options": {},
"aggregate": "aggregateAllItemData"
},
"typeVersion": 1
},
{
"id": "c84e195c-edd2-4f59-8986-516d116b7352",
"name": "Establecer ID del conjunto de datos, URL de solicitud",
"type": "n8n-nodes-base.set",
"position": [
-680,
-400
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "c16061c8-c829-4bd3-b335-e79c605665f2",
"name": "dataset_id",
"type": "string",
"value": "gd_l7q7dkf244hwjntr0"
},
{
"id": "a4594c55-e39e-4a9e-80d6-d39370001e20",
"name": "request",
"type": "string",
"value": "[{ \"url\": \"https://www.amazon.com/Quencher-FlowState-Stainless-Insulated-Smoothie/dp/B0CRMZHDG8\" }]"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "ceae108e-ed78-40c5-8e58-7013591ccaad",
"name": "Nota adhesiva",
"type": "n8n-nodes-base.stickyNote",
"position": [
-900,
-700
],
"parameters": {
"width": 520,
"height": 280,
"content": "## Note\n\nDeals with the Amazon web scraping by utilizing Bright Data Web Scraper Product.\n\n\n**Please make sure to set the Bright Data \n -> Dataset Id, Request URL and update the Webhook Notification URL**\n\nRefer \n- https://brightdata.com/products/web-scraper/ai\n- https://brightdata.com/products/web-scraper"
},
"typeVersion": 1
},
{
"id": "1f55cffa-abd9-437b-bc9d-3fe0d8b02454",
"name": "Nota adhesiva1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-120,
-600
],
"parameters": {
"color": 5,
"width": 720,
"height": 500,
"content": "## Wait until the Snapshot is ready"
},
"typeVersion": 1
},
{
"id": "d8ba0f62-80a9-4e66-b70c-086ee5992df6",
"name": "Nota adhesiva2",
"type": "n8n-nodes-base.stickyNote",
"position": [
-900,
-220
],
"parameters": {
"color": 4,
"width": 660,
"content": "## Who can benefit?\nData analysts, scientists, engineers, and developers seeking efficient methods to collect and analyze web data for AI, ML, big data applications, and more will find Scraper APIs particularly beneficial."
},
"typeVersion": 1
},
{
"id": "7fdffafd-f256-4760-b001-a42b5198dbad",
"name": "Crear datos binarios",
"type": "n8n-nodes-base.function",
"position": [
1100,
-720
],
"parameters": {
"functionCode": "items[0].binary = {\n data: {\n data: new Buffer(JSON.stringify(items[0].json, null, 2)).toString('base64')\n }\n};\nreturn items;"
},
"typeVersion": 1
},
{
"id": "934ab31a-cfb9-4e97-8d86-92cd95dd219c",
"name": "Escribir el archivo en disco",
"type": "n8n-nodes-base.readWriteFile",
"position": [
1320,
-720
],
"parameters": {
"options": {},
"fileName": "d:\\bulk_data.json",
"operation": "write"
},
"typeVersion": 1
},
{
"id": "1130523a-b598-425e-acf1-417ae8699f66",
"name": "Solicitud HTTP a la URL especificada",
"type": "n8n-nodes-base.httpRequest",
"position": [
-460,
-395
],
"parameters": {
"url": "https://api.brightdata.com/datasets/v3/trigger",
"method": "POST",
"options": {},
"jsonBody": "={{ $json.request }}",
"sendBody": true,
"sendQuery": true,
"sendHeaders": true,
"specifyBody": "json",
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"queryParameters": {
"parameters": [
{
"name": "dataset_id",
"value": "={{ $json.dataset_id }}"
},
{
"name": "format",
"value": "json"
},
{
"name": "uncompressed_webhook",
"value": "true"
}
]
},
"headerParameters": {
"parameters": [
{}
]
}
},
"credentials": {
"httpHeaderAuth": {
"id": "kdbqXuxIR8qIxF7y",
"name": "Header Auth account"
}
},
"typeVersion": 4.2
}
],
"active": false,
"pinData": {},
"settings": {
"executionOrder": "v1"
},
"versionId": "8fb2eb85-ffd6-4632-9668-00f29bc91c34",
"connections": {
"533968cd-1329-4a86-8875-478600ed82b7": {
"main": [
[
{
"node": "c9cf847a-6399-4c93-a901-30f1c0e7408a",
"type": "main",
"index": 0
}
],
[
{
"node": "9d6cd882-c287-46ca-bc1e-df6b995fc422",
"type": "main",
"index": 0
}
]
]
},
"9d6cd882-c287-46ca-bc1e-df6b995fc422": {
"main": [
[
{
"node": "b648614e-c33e-4818-8348-e95df56928c7",
"type": "main",
"index": 0
}
]
]
},
"83991fdf-0402-4de3-bbb5-7050e3e9fb62": {
"main": [
[
{
"node": "b648614e-c33e-4818-8348-e95df56928c7",
"type": "main",
"index": 0
}
]
]
},
"408a36af-decb-49b3-a95e-a2df0b6eea5f": {
"main": [
[
{
"node": "6548a794-a4fd-4050-b07d-bc7ca4517882",
"type": "main",
"index": 0
}
]
]
},
"c9cf847a-6399-4c93-a901-30f1c0e7408a": {
"main": [
[
{
"node": "408a36af-decb-49b3-a95e-a2df0b6eea5f",
"type": "main",
"index": 0
}
]
]
},
"7fdffafd-f256-4760-b001-a42b5198dbad": {
"main": [
[
{
"node": "934ab31a-cfb9-4e97-8d86-92cd95dd219c",
"type": "main",
"index": 0
}
]
]
},
"b648614e-c33e-4818-8348-e95df56928c7": {
"main": [
[
{
"node": "533968cd-1329-4a86-8875-478600ed82b7",
"type": "main",
"index": 0
}
]
]
},
"6548a794-a4fd-4050-b07d-bc7ca4517882": {
"main": [
[
{
"node": "408a1584-666f-471e-bfcd-c4d857319688",
"type": "main",
"index": 0
},
{
"node": "7fdffafd-f256-4760-b001-a42b5198dbad",
"type": "main",
"index": 0
}
]
]
},
"c84e195c-edd2-4f59-8986-516d116b7352": {
"main": [
[
{
"node": "1130523a-b598-425e-acf1-417ae8699f66",
"type": "main",
"index": 0
}
]
]
},
"1130523a-b598-425e-acf1-417ae8699f66": {
"main": [
[
{
"node": "83991fdf-0402-4de3-bbb5-7050e3e9fb62",
"type": "main",
"index": 0
}
]
]
},
"1bdca5ae-1e56-4cf2-a8dc-e135a6a2dfec": {
"main": [
[
{
"node": "c84e195c-edd2-4f59-8986-516d116b7352",
"type": "main",
"index": 0
}
]
]
}
}
}¿Cómo usar este flujo de trabajo?
Copie el código de configuración JSON de arriba, cree un nuevo flujo de trabajo en su instancia de n8n y seleccione "Importar desde JSON", pegue la configuración y luego modifique la configuración de credenciales según sea necesario.
¿En qué escenarios es adecuado este flujo de trabajo?
Avanzado - Ingeniería, Producto
¿Es de pago?
Este flujo de trabajo es completamente gratuito, puede importarlo y usarlo directamente. Sin embargo, tenga en cuenta que los servicios de terceros utilizados en el flujo de trabajo (como la API de OpenAI) pueden requerir un pago por su cuenta.
Flujos de trabajo relacionados recomendados
Ranjan Dailata
@ranjancseCompartir este flujo de trabajo