Datenextraktion mit Bright Data für bevorstehende Ereignisse

Name: Datenextraktion mit Bright Data für bevorstehende Ereignisse
Rating: 4.5 (10 reviews)
Author: Yaron Been
Fortgeschritten
Dies ist ein AI-Bereich Automatisierungsworkflow mit 11 Nodes. Hauptsächlich werden Code, Html, HttpRequest, GoogleSheets, ScheduleTrigger und andere Nodes verwendet, kombiniert mit KI-Technologie für intelligente Automatisierung. Ver automatisierte Ereignis-Erkennung mit Bright Data und n8n
Voraussetzungen
•Möglicherweise sind Ziel-API-Anmeldedaten erforderlich
•Google Sheets API-Anmeldedaten
Verwendete Nodes (11)

Kategorie

Künstliche Intelligenz
Workflow-Vorschau
Visualisierung der Node-Verbindungen, mit Zoom und Pan
Trigger - Wöchentlicher Lauf
Ereignis-Website mit Bright Data scrapen
HTML parsen - Ereigniskarten extrahieren
Ereignisdaten formatieren
In Google Tabellen speichern
React Flow
Workflow exportieren
Kopieren Sie die folgende JSON-Konfiguration und importieren Sie sie in n8n
{
  "id": "NY5tq9f8iYwpvPC6",
  "meta": {
    "instanceId": "60046904b104f0f72b2629a9d88fe9f676be4035769f1f08dad1dd38a76b9480"
  },
  "name": "Scrape_Upcoming_Events_using_bright_data",
  "tags": [],
  "nodes": [
    {
      "id": "5012cf3e-7fa6-4971-906b-760baeb51396",
      "name": "Trigger - Wöchentlicher Lauf",
      "type": "n8n-nodes-base.scheduleTrigger",
      "position": [
        -440,
        1160
      ],
      "parameters": {
        "rule": {
          "interval": [
            {
              "field": "weeks",
              "triggerAtDay": [
                1
              ],
              "triggerAtHour": 8
            }
          ]
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "8962483b-d095-4ade-bf9c-c3bfa5fe1831",
      "name": "Ereignis-Website mit Bright Data scrapen",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -200,
        1160
      ],
      "parameters": {
        "url": "https://api.brightdata.com/request",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "sendHeaders": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "zone",
              "value": "n8n_unblocker"
            },
            {
              "name": "url",
              "value": "https://www.eventbrite.com/d/online/technology--events/"
            },
            {
              "name": "country",
              "value": "us"
            },
            {
              "name": "format",
              "value": "raw"
            }
          ]
        },
        "headerParameters": {
          "parameters": [
            {
              "name": "Authorization",
              "value": "Bearer API_KEY"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "12d62c8b-047d-4890-85f7-c9a8097fcc2d",
      "name": "HTML parsen - Ereigniskarten extrahieren",
      "type": "n8n-nodes-base.html",
      "position": [
        60,
        1160
      ],
      "parameters": {
        "options": {},
        "operation": "extractHtmlContent",
        "extractionValues": {
          "values": [
            {
              "key": "Title",
              "cssSelector": "h3",
              "returnArray": true
            },
            {
              "key": "Date and Time",
              "cssSelector": "div.Stack_root__1ksk7 > p:nth-of-type(1)",
              "returnArray": true
            }
          ]
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "df0d0e23-1afe-4c74-b408-e5b3123c13a4",
      "name": "Ereignisdaten formatieren",
      "type": "n8n-nodes-base.code",
      "position": [
        280,
        1160
      ],
      "parameters": {
        "jsCode": "// Get the input data from the previous node\nconst inputData = items[0].json;\n\n// Access the arrays from the input object\nconst titles = inputData.Title;\nconst dates = inputData['Date and Time'];\n\n// Use a Set to track titles we've already processed to remove duplicates\nconst seenTitles = new Set();\nconst cleanedEvents = [];\n\n// Get the number of actual events (the input has duplicates)\nconst eventCount = dates.length;\n\nfor (let i = 0; i < eventCount; i++) {\n  const title = titles[i];\n  const date = dates[i];\n\n  // 1. Skip if the title is empty or we've already processed this event\n  if (!title || seenTitles.has(title)) {\n    continue;\n  }\n\n  // 2. Filter out irrelevant \"Trends\" items that start with a number (e.g., \"1. Tickets\")\n  if (/^\\d+\\.\\s/.test(title)) {\n    continue;\n  }\n  \n  // 3. Add the unique event to our results (without the URL)\n  cleanedEvents.push({\n    \"Title\": title,\n    \"Date and Time\": date\n  });\n\n  // 4. Mark this title as seen to avoid adding it again\n  seenTitles.add(title);\n}\n\n// Return the newly structured and cleaned array of event objects\nreturn cleanedEvents;"
      },
      "typeVersion": 2
    },
    {
      "id": "c58d917f-d756-45ad-a38a-b52254250154",
      "name": "In Google Tabellen speichern",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        580,
        1160
      ],
      "parameters": {
        "columns": {
          "value": {
            "Title": "={{ $json.Title }}",
            "Date & Time": "={{ $json['Date and Time'] }}"
          },
          "schema": [
            {
              "id": "Title",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Title",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Date & Time",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Date & Time",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": [],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "append",
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/157HRnzYP9IShr4jTQH7_y3r35cq2NVu0hv7kAW9kqn0/edit#gid=0",
          "cachedResultName": "Sheet1"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "157HRnzYP9IShr4jTQH7_y3r35cq2NVu0hv7kAW9kqn0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/157HRnzYP9IShr4jTQH7_y3r35cq2NVu0hv7kAW9kqn0/edit?usp=drivesdk",
          "cachedResultName": "Events"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "id": "r2mDaisH6e9VkwHl",
          "name": "Google Sheets account"
        }
      },
      "typeVersion": 4.6
    },
    {
      "id": "ed7d1601-99b4-4191-b75c-cb7060a42bc5",
      "name": "Haftnotiz",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -480,
        460
      ],
      "parameters": {
        "color": 5,
        "width": 440,
        "height": 880,
        "content": "## 🧩 SECTION 1: 🔄 **Trigger + Scrape Website Content**\n\n### 🧭 Purpose: Kickstart the automation & fetch data from the web\n\n---\n\n| 🔧 **Nodes Involved** | 🔹 `Schedule Trigger`<br>🔹 `HTTP Request (Bright Data Web Unlocker)`   |\n| --------------------- | ----------------------------------------------------------------------- |\n| 📌 **Goal**           | Automatically fetch webinar details from Eventbrite (or any event site) |\n| 🛠️ **How it works**  |                                                                         |\n\n1. **⏰ `Schedule Trigger`**\n   This node runs the entire workflow at a preset time (e.g., daily at 8 AM). It ensures your automation runs hands-free — no manual clicking.\n\n2. **🌐 `HTTP Request`**\n   This node uses **Bright Data's Web Unlocker** to bypass bot detection and scrape real event pages from protected websites like Eventbrite.\n   The **POST** request is sent to `https://api.brightdata.com/request`, which returns **raw HTML** of the event listings page.\n\n💡 *Why Bright Data?*\nWebsites like Eventbrite use anti-bot systems. Bright Data safely navigates that with proxies + human-like browsing.\n\n---\n\n"
      },
      "typeVersion": 1
    },
    {
      "id": "24726d95-f802-45e8-868d-575ce6453580",
      "name": "Haftnotiz1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        0,
        0
      ],
      "parameters": {
        "color": 6,
        "width": 420,
        "height": 1340,
        "content": "## 🧩 SECTION 2: 🔍 **Extract & Structure Event Data**\n\n### 🧠 Purpose: Turn messy HTML into clean, usable event info\n\n---\n\n| 🔧 **Nodes Involved** | 🔹 `HTML Extract`<br>🔹 `Code Node`                      |\n| --------------------- | -------------------------------------------------------- |\n| 📌 **Goal**           | Isolate event titles, times, and links from the raw HTML |\n| 🛠️ **How it works**  |                                                          |\n\n1. **🧾 `HTML Extract`**\n   This node lets you select elements from the HTML using **CSS selectors**, just like a web developer would.\n   You extract:\n\n   * `.eds-event-card-content__title` → **Title**\n   * `.eds-event-card-content__sub-title` → **Date & Time**\n   * `.eds-event-card-content__action-link[href]` → **Event URL**\n\n2. **🧮 `Code Node`**\n   Here, we **loop through the extracted data** and format it into clean JSON objects:\n\n   ```js\n   return items[0].json.titles.map((title, i) => {\n     return {\n       json: {\n         title,\n         date: items[0].json.dates[i],\n         link: items[0].json.links[i]\n       }\n     };\n   });\n   ```\n\n   Result: You now have a list of clean, structured webinar entries.\n\n💡 *Why use a code node?*\nIt transforms raw scraped chunks into usable pieces ready for storage or integration.\n\n---\n\n"
      },
      "typeVersion": 1
    },
    {
      "id": "834b6789-b284-49e0-86ba-a9d803fb52aa",
      "name": "Haftnotiz2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        460,
        540
      ],
      "parameters": {
        "color": 3,
        "width": 340,
        "height": 800,
        "content": "## 🧩 SECTION 3: 📄 **Store Events in Google Sheets**\n\n### 💾 Purpose: Save your scraped webinars into a spreadsheet\n\n---\n\n| 🔧 **Node Involved** | 🔹 `Google Sheets: Append`                      |\n| -------------------- | ----------------------------------------------- |\n| 📌 **Goal**          | Automatically log each event into a spreadsheet |\n| 🛠️ **How it works** |                                                 |\n\n* This node appends each structured event entry to a connected **Google Sheet**.\n* Each row includes:\n  📌 `Event Title` | 📅 `Date & Time` | 🔗 `URL`\n* You can filter, sort, or share this sheet as a **database of upcoming webinars**.\n\n💡 *Why Google Sheets?*\nIt’s universal, easy to search/filter, and sharable with a team. No special tool or database needed.\n\n---\n\n"
      },
      "typeVersion": 1
    },
    {
      "id": "48ecec93-c46c-4398-ab14-bcfe617b0728",
      "name": "Haftnotiz9",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2140,
        480
      ],
      "parameters": {
        "color": 4,
        "width": 1300,
        "height": 320,
        "content": "=======================================\n            WORKFLOW ASSISTANCE\n=======================================\nFor any questions or support, please contact:\n    Yaron@nofluff.online\n\nExplore more tips and tutorials here:\n   - YouTube: https://www.youtube.com/@YaronBeen/videos\n   - LinkedIn: https://www.linkedin.com/in/yaronbeen/\n=======================================\n"
      },
      "typeVersion": 1
    },
    {
      "id": "4bceecb8-56d2-47d7-8833-92f29af8f2fa",
      "name": "Haftnotiz3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2140,
        820
      ],
      "parameters": {
        "color": 4,
        "width": 1289,
        "height": 2298,
        "content": "### 🎓 **Auto-Webinar Harvester**\n\n#### *Scrape Upcoming Events & Save to Google Sheets Automatically*\n\n> **Tagline:** “Your always-on assistant for discovering industry webinars — no clicks required.”\n> 🧠💻⚙️📅\n\n---\n\n## 🧩 SECTION 1: 🔄 **Trigger + Scrape Website Content**\n\n### 🧭 Purpose: Kickstart the automation & fetch data from the web\n\n---\n\n| 🔧 **Nodes Involved** | 🔹 `Schedule Trigger`<br>🔹 `HTTP Request (Bright Data Web Unlocker)`   |\n| --------------------- | ----------------------------------------------------------------------- |\n| 📌 **Goal**           | Automatically fetch webinar details from Eventbrite (or any event site) |\n| 🛠️ **How it works**  |                                                                         |\n\n1. **⏰ `Schedule Trigger`**\n   This node runs the entire workflow at a preset time (e.g., daily at 8 AM). It ensures your automation runs hands-free — no manual clicking.\n\n2. **🌐 `HTTP Request`**\n   This node uses **Bright Data's Web Unlocker** to bypass bot detection and scrape real event pages from protected websites like Eventbrite.\n   The **POST** request is sent to `https://api.brightdata.com/request`, which returns **raw HTML** of the event listings page.\n\n💡 *Why Bright Data?*\nWebsites like Eventbrite use anti-bot systems. Bright Data safely navigates that with proxies + human-like browsing.\n\n---\n\n## 🧩 SECTION 2: 🔍 **Extract & Structure Event Data**\n\n### 🧠 Purpose: Turn messy HTML into clean, usable event info\n\n---\n\n| 🔧 **Nodes Involved** | 🔹 `HTML Extract`<br>🔹 `Code Node`                      |\n| --------------------- | -------------------------------------------------------- |\n| 📌 **Goal**           | Isolate event titles, times, and links from the raw HTML |\n| 🛠️ **How it works**  |                                                          |\n\n1. **🧾 `HTML Extract`**\n   This node lets you select elements from the HTML using **CSS selectors**, just like a web developer would.\n   You extract:\n\n   * `.eds-event-card-content__title` → **Title**\n   * `.eds-event-card-content__sub-title` → **Date & Time**\n   * `.eds-event-card-content__action-link[href]` → **Event URL**\n\n2. **🧮 `Code Node`**\n   Here, we **loop through the extracted data** and format it into clean JSON objects:\n\n   ```js\n   return items[0].json.titles.map((title, i) => {\n     return {\n       json: {\n         title,\n         date: items[0].json.dates[i],\n         link: items[0].json.links[i]\n       }\n     };\n   });\n   ```\n\n   Result: You now have a list of clean, structured webinar entries.\n\n💡 *Why use a code node?*\nIt transforms raw scraped chunks into usable pieces ready for storage or integration.\n\n---\n\n## 🧩 SECTION 3: 📄 **Store Events in Google Sheets**\n\n### 💾 Purpose: Save your scraped webinars into a spreadsheet\n\n---\n\n| 🔧 **Node Involved** | 🔹 `Google Sheets: Append`                      |\n| -------------------- | ----------------------------------------------- |\n| 📌 **Goal**          | Automatically log each event into a spreadsheet |\n| 🛠️ **How it works** |                                                 |\n\n* This node appends each structured event entry to a connected **Google Sheet**.\n* Each row includes:\n  📌 `Event Title` | 📅 `Date & Time` | 🔗 `URL`\n* You can filter, sort, or share this sheet as a **database of upcoming webinars**.\n\n💡 *Why Google Sheets?*\nIt’s universal, easy to search/filter, and sharable with a team. No special tool or database needed.\n\n---\n\n## 🎯 Why This Workflow Is Powerful (Even for Beginners)\n\n| 🚀 Benefit                        | ✅ How It Helps You                                                      |\n| --------------------------------- | ----------------------------------------------------------------------- |\n| Zero manual scraping              | Just set a schedule and forget — everything updates on its own          |\n| Works with hard-to-scrape sites   | Bright Data + Web Unlocker beats CAPTCHAs and blocks                    |\n| Clean, structured output          | Easy to use data for marketing, lead gen, or syncing to Google Calendar |\n| Google Sheet as a source of truth | Everyone on your team can access and use the data                       |\n\n---\n\n"
      },
      "typeVersion": 1
    },
    {
      "id": "197d2ec1-609e-4905-8671-e41ed6ceb92f",
      "name": "Haftnotiz4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        900,
        540
      ],
      "parameters": {
        "color": 7,
        "width": 380,
        "height": 240,
        "content": "## I’ll receive a tiny commission if you join Bright Data through this link—thanks for fueling more free content!\n\n### https://get.brightdata.com/1tndi4600b25"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "b59d50da-e665-47a2-975b-432971342df5",
  "connections": {
    "df0d0e23-1afe-4c74-b408-e5b3123c13a4": {
      "main": [
        [
          {
            "node": "c58d917f-d756-45ad-a38a-b52254250154",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "5012cf3e-7fa6-4971-906b-760baeb51396": {
      "main": [
        [
          {
            "node": "8962483b-d095-4ade-bf9c-c3bfa5fe1831",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "12d62c8b-047d-4890-85f7-c9a8097fcc2d": {
      "main": [
        [
          {
            "node": "df0d0e23-1afe-4c74-b408-e5b3123c13a4",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "8962483b-d095-4ade-bf9c-c3bfa5fe1831": {
      "main": [
        [
          {
            "node": "12d62c8b-047d-4890-85f7-c9a8097fcc2d",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
Häufig gestellte Fragen
Wie verwende ich diesen Workflow?

Kopieren Sie den obigen JSON-Code, erstellen Sie einen neuen Workflow in Ihrer n8n-Instanz und wählen Sie "Aus JSON importieren". Fügen Sie die Konfiguration ein und passen Sie die Anmeldedaten nach Bedarf an.
Für welche Szenarien ist dieser Workflow geeignet?

Fortgeschritten - Künstliche Intelligenz
Ist es kostenpflichtig?

Dieser Workflow ist völlig kostenlos. Beachten Sie jedoch, dass Drittanbieterdienste (wie OpenAI API), die im Workflow verwendet werden, möglicherweise kostenpflichtig sind.