Multi-Website-Crawler basierend auf Jina.ai

Experte

Dies ist ein AI-Bereich Automatisierungsworkflow mit 16 Nodes. Hauptsächlich werden Set, Xml, Code, Wait, Limit und andere Nodes verwendet, kombiniert mit KI-Technologie für intelligente Automatisierung. 💡🌐 Mehrseitiger Website-Scraper mit Jina.ai

Voraussetzungen
  • Google Drive API-Anmeldedaten
  • Möglicherweise sind Ziel-API-Anmeldedaten erforderlich
Workflow-Vorschau
Visualisierung der Node-Verbindungen, mit Zoom und Pan
Workflow exportieren
Kopieren Sie die folgende JSON-Konfiguration und importieren Sie sie in n8n
{
  "id": "xEij0kj2I1DHbL3I",
  "meta": {
    "instanceId": "31e69f7f4a77bf465b805824e303232f0227212ae922d12133a0f96ffeab4fef",
    "templateCredsSetupCompleted": true
  },
  "name": "💡🌐 Essential Multipage Website Scraper with Jina.ai",
  "tags": [],
  "nodes": [
    {
      "id": "3a503859-ef0a-492d-81c6-37e4f0c4c25e",
      "name": "Notizzettel",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -840,
        0
      ],
      "parameters": {
        "color": 3,
        "width": 340,
        "height": 320,
        "content": "## Jina.ai Web Scraper\n### No API Key Required\n"
      },
      "typeVersion": 1
    },
    {
      "id": "c5217a1a-f074-409b-8340-72afdc5fc8b5",
      "name": "Bei Klick auf 'Workflow testen'",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -1500,
        -300
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "72af3b00-2632-4877-a0b6-7477e2f468f7",
      "name": "Über Elemente iterieren",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        -1080,
        20
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "11f0fa02-51f8-41cc-b789-5c452b6899aa",
      "name": "Warten",
      "type": "n8n-nodes-base.wait",
      "position": [
        80,
        220
      ],
      "webhookId": "081ce124-0cbf-4a21-a1e7-2c465f460448",
      "parameters": {},
      "typeVersion": 1.1
    },
    {
      "id": "cf3b5887-8ff2-46e0-ab33-384ab0987cbb",
      "name": "Limit",
      "type": "n8n-nodes-base.limit",
      "position": [
        80,
        -300
      ],
      "parameters": {
        "maxItems": 20
      },
      "typeVersion": 1
    },
    {
      "id": "c4f04d82-aa33-46cf-a8e2-0b4e717e754a",
      "name": "Liste der Website-URLs abrufen",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -780,
        -300
      ],
      "parameters": {
        "url": "={{ $json.sitemap_url }}",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "7f507c38-1e9e-4c46-8dea-bd6daf65dc55",
      "name": "Konvertieren zu JSON",
      "type": "n8n-nodes-base.xml",
      "position": [
        -560,
        -300
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 1
    },
    {
      "id": "e21b55c2-8b0d-4c7c-ba91-a2d563a4c966",
      "name": "Liste der Website-URLs erstellen",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        -340,
        -300
      ],
      "parameters": {
        "options": {},
        "fieldToSplitOut": "urlset.url"
      },
      "typeVersion": 1
    },
    {
      "id": "61555239-8a16-424e-8a60-700f6ebaa270",
      "name": "Nach Themen oder Seiten filtern",
      "type": "n8n-nodes-base.filter",
      "position": [
        -120,
        -300
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "or",
          "conditions": [
            {
              "id": "d66c304d-879a-4dc4-908f-ab0665093672",
              "operator": {
                "name": "filter.operator.equals",
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.loc }}",
              "rightValue": "=https://ai.pydantic.dev/"
            },
            {
              "id": "3c930950-bee4-442b-82e6-4437fd39a933",
              "operator": {
                "type": "string",
                "operation": "contains"
              },
              "leftValue": "={{ $json.loc.toLowerCase() }}",
              "rightValue": "agent"
            },
            {
              "id": "aaeaf34e-ad5a-4673-b3bd-8bddf3500988",
              "operator": {
                "type": "string",
                "operation": "contains"
              },
              "leftValue": "={{ $json.loc.toLowerCase() }}",
              "rightValue": "tool"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "dd25fb57-64a3-4c47-be04-6eb66d16520a",
      "name": "Website-URL setzen",
      "type": "n8n-nodes-base.set",
      "position": [
        -1080,
        -300
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "1601dc3e-8024-4e19-b592-93a4e4f77641",
              "name": "sitemap_url",
              "type": "string",
              "value": "https://ai.pydantic.dev/sitemap.xml"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "14ac1c87-29fe-44c8-9c1e-f247a292dde5",
      "name": "Jina.ai Web Scraper",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -720,
        120
      ],
      "parameters": {
        "url": "=https://r.jina.ai/{{ $json.loc }}",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "be253ec2-f088-4895-8ef2-61a3720cf68b",
      "name": "Webseiteninhalte in Google Drive speichern",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        -120,
        120
      ],
      "parameters": {
        "name": "={{ $('Loop Over Items').item.json.loc }} - {{ $json.title }}",
        "content": "={{ $json.markdown }}",
        "driveId": {
          "__rl": true,
          "mode": "list",
          "value": "My Drive"
        },
        "options": {},
        "folderId": {
          "__rl": true,
          "mode": "list",
          "value": "root",
          "cachedResultName": "/ (Root folder)"
        },
        "operation": "createFromText"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "UhdXGYLTAJbsa0xX",
          "name": "Google Drive account"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "95d808c7-a3ca-4f59-a385-cc77bdff322e",
      "name": "Titel & Markdown-Inhalt extrahieren",
      "type": "n8n-nodes-base.code",
      "position": [
        -380,
        120
      ],
      "parameters": {
        "jsCode": "// Get the text output from the previous node\nconst data = $input.first().json.data;\n\n// Regular expression to capture the title line\nconst titleRegex = /^Title:\\s*(.+)$/m;\n// Regular expression to capture everything after \"Markdown Content:\"\nconst markdownRegex = /Markdown Content:\\n([\\s\\S]+)/;\n\n// Extract the title using the first capture group\nconst titleMatch = data.match(titleRegex);\nconst title = titleMatch ? titleMatch[1].trim() : '';\n\n// Extract the markdown content using the first capture group\nconst markdownMatch = data.match(markdownRegex);\nconst markdown = markdownMatch ? markdownMatch[1].trim() : '';\n\n// Return a single object with title and markdown as unique values\nreturn { title, markdown };"
      },
      "typeVersion": 2
    },
    {
      "id": "2fb86c81-c144-4450-908c-559855deadef",
      "name": "Notizzettel1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1240,
        -580
      ],
      "parameters": {
        "color": 7,
        "width": 1540,
        "height": 1080,
        "content": "# 💡🌐 Essential Multipage Website Scraper with Jina.ai\n## Scrape entire websites with this workflow\n**Use responsibly and follow local rules and regulations**"
      },
      "typeVersion": 1
    },
    {
      "id": "b470b294-95d0-4e51-a9cc-2fe17316a771",
      "name": "Notizzettel2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1580,
        -400
      ],
      "parameters": {
        "color": 4,
        "width": 280,
        "height": 300,
        "content": "## 👍Try Me!"
      },
      "typeVersion": 1
    },
    {
      "id": "fafd0623-a423-4e73-9609-cee8e81f5c13",
      "name": "Notizzettel3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1180,
        -400
      ],
      "parameters": {
        "width": 300,
        "height": 300,
        "content": "## 👇Add Website Sitemap URL"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "2e815787-d83b-4ab7-a959-2f33006a37a5",
  "connections": {
    "11f0fa02-51f8-41cc-b789-5c452b6899aa": {
      "main": [
        [
          {
            "node": "72af3b00-2632-4877-a0b6-7477e2f468f7",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "cf3b5887-8ff2-46e0-ab33-384ab0987cbb": {
      "main": [
        [
          {
            "node": "72af3b00-2632-4877-a0b6-7477e2f468f7",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "7f507c38-1e9e-4c46-8dea-bd6daf65dc55": {
      "main": [
        [
          {
            "node": "e21b55c2-8b0d-4c7c-ba91-a2d563a4c966",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "72af3b00-2632-4877-a0b6-7477e2f468f7": {
      "main": [
        [],
        [
          {
            "node": "14ac1c87-29fe-44c8-9c1e-f247a292dde5",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "dd25fb57-64a3-4c47-be04-6eb66d16520a": {
      "main": [
        [
          {
            "node": "c4f04d82-aa33-46cf-a8e2-0b4e717e754a",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "14ac1c87-29fe-44c8-9c1e-f247a292dde5": {
      "main": [
        [
          {
            "node": "95d808c7-a3ca-4f59-a385-cc77bdff322e",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "c4f04d82-aa33-46cf-a8e2-0b4e717e754a": {
      "main": [
        [
          {
            "node": "7f507c38-1e9e-4c46-8dea-bd6daf65dc55",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "61555239-8a16-424e-8a60-700f6ebaa270": {
      "main": [
        [
          {
            "node": "cf3b5887-8ff2-46e0-ab33-384ab0987cbb",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "e21b55c2-8b0d-4c7c-ba91-a2d563a4c966": {
      "main": [
        [
          {
            "node": "61555239-8a16-424e-8a60-700f6ebaa270",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "95d808c7-a3ca-4f59-a385-cc77bdff322e": {
      "main": [
        [
          {
            "node": "be253ec2-f088-4895-8ef2-61a3720cf68b",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "c5217a1a-f074-409b-8340-72afdc5fc8b5": {
      "main": [
        [
          {
            "node": "dd25fb57-64a3-4c47-be04-6eb66d16520a",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "be253ec2-f088-4895-8ef2-61a3720cf68b": {
      "main": [
        [
          {
            "node": "11f0fa02-51f8-41cc-b789-5c452b6899aa",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
Häufig gestellte Fragen

Wie verwende ich diesen Workflow?

Kopieren Sie den obigen JSON-Code, erstellen Sie einen neuen Workflow in Ihrer n8n-Instanz und wählen Sie "Aus JSON importieren". Fügen Sie die Konfiguration ein und passen Sie die Anmeldedaten nach Bedarf an.

Für welche Szenarien ist dieser Workflow geeignet?

Experte - Künstliche Intelligenz

Ist es kostenpflichtig?

Dieser Workflow ist völlig kostenlos. Beachten Sie jedoch, dass Drittanbieterdienste (wie OpenAI API), die im Workflow verwendet werden, möglicherweise kostenpflichtig sind.

Workflow-Informationen
Schwierigkeitsgrad
Experte
Anzahl der Nodes16
Kategorie1
Node-Typen12
Schwierigkeitsbeschreibung

Für fortgeschrittene Benutzer, komplexe Workflows mit 16+ Nodes

Autor
Joseph LePage

Joseph LePage

@joe

As an AI Automation consultant based in Canada, I partner with forward-thinking organizations to implement AI solutions that streamline operations and drive growth.

Externe Links
Auf n8n.io ansehen

Diesen Workflow teilen

Kategorien

Kategorien: 34