Webpage Reader
Fortgeschritten
Dies ist ein Document Extraction-Bereich Automatisierungsworkflow mit 15 Nodes. Hauptsächlich werden If, Set, HttpRequest, StopAndError, ExecuteWorkflowTrigger und andere Nodes verwendet. Saubere Webseiteninhalte für KI-Agenten und Workflows extrahieren, mit Anti-Crawler-Backup-Lösung
Voraussetzungen
- •Möglicherweise sind Ziel-API-Anmeldedaten erforderlich
Verwendete Nodes (15)
Kategorie
Workflow-Vorschau
Visualisierung der Node-Verbindungen, mit Zoom und Pan
Workflow exportieren
Kopieren Sie die folgende JSON-Konfiguration und importieren Sie sie in n8n
{
"id": "9UyGvrk6EDY6Hm3W",
"meta": {
"instanceId": "7e84375f1a5a2398bff60c3e83bb370423dae55c261ed7c48ca02f15548655a7",
"templateCredsSetupCompleted": true
},
"name": "WebPage-Reader",
"tags": [],
"nodes": [
{
"id": "f449a425-4ae9-462d-91bb-ff0b85a73202",
"name": "Content Extractor",
"type": "n8n-nodes-webpage-content-extractor.webpageContentExtractor",
"position": [
940,
100
],
"parameters": {
"html": "={{ $json.data }}"
},
"typeVersion": 1
},
{
"id": "e52eddc5-72a7-4bd8-8679-ecedccad447c",
"name": "Try Antibot Evasion",
"type": "n8n-nodes-base.if",
"position": [
280,
180
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "or",
"conditions": [
{
"id": "1351d4e8-1c27-43c2-8335-aee7c097422a",
"operator": {
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.error.code }}",
"rightValue": "ECONNABORTED"
},
{
"id": "28a4c2eb-0a9b-44ac-87d5-6571be2fb447",
"operator": {
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.error.code }}",
"rightValue": "ETIMEDOUT"
},
{
"id": "1287e08b-a342-4651-8e56-1d1ff4677222",
"operator": {
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.error.code }}",
"rightValue": "ERR_CANCELED"
},
{
"id": "45256daa-063f-4ed3-8ef0-5ec91cdc0974",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.error.code }}",
"rightValue": "ERR_BAD_REQUEST"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "a90654b8-b83b-41ed-a665-9a0303a84de3",
"name": "Scrape.do",
"type": "n8n-nodes-base.httpRequest",
"position": [
500,
180
],
"parameters": {
"url": "=http://api.scrape.do",
"options": {
"timeout": 120000
},
"sendQuery": true,
"authentication": "genericCredentialType",
"genericAuthType": "httpQueryAuth",
"queryParameters": {
"parameters": [
{
"name": "url",
"value": "={{ $json.url }}"
}
]
}
},
"credentials": {
"httpQueryAuth": {
"id": "SMKkxhdbOewTAnqe",
"name": "Scrape.do account"
}
},
"retryOnFail": true,
"typeVersion": 4.2,
"waitBetweenTries": 5000
},
{
"id": "62c1fb07-35e0-4942-b38d-b888b559e109",
"name": "Server Error",
"type": "n8n-nodes-base.stopAndError",
"position": [
500,
380
],
"parameters": {
"errorMessage": "=Error requesting website ({{ $json.error.code }})"
},
"typeVersion": 1
},
{
"id": "7e793496-3ba7-4a30-bb6c-d483c00671c6",
"name": "Not 404",
"type": "n8n-nodes-base.if",
"position": [
60,
180
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "81558598-6188-4712-962c-3f80fcba1297",
"operator": {
"type": "number",
"operation": "notEquals"
},
"leftValue": "={{ $json.error.status }}",
"rightValue": 404
}
]
}
},
"typeVersion": 2.2
},
{
"id": "9ae25973-ffa0-4b14-943b-d8a9fa0ee3b0",
"name": "Not Found",
"type": "n8n-nodes-base.stopAndError",
"position": [
280,
380
],
"parameters": {
"errorMessage": "=Error requesting website (404)"
},
"typeVersion": 1
},
{
"id": "8af2bbee-ebd0-49e2-aa4a-bc58e1ccaf31",
"name": "Simple Scraper",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueErrorOutput",
"position": [
-160,
120
],
"parameters": {
"url": "={{ $json.url }}",
"options": {
"timeout": 10000,
"redirect": {
"redirect": {}
},
"allowUnauthorizedCerts": true
},
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "User-Agent",
"value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.2,
"waitBetweenTries": 5000
},
{
"id": "f0f8106a-9a8c-492e-8082-fc82a3852765",
"name": "Full Text",
"type": "n8n-nodes-base.if",
"position": [
1160,
100
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "b32569d1-ba84-401f-9dc9-99b2c804cba2",
"operator": {
"type": "boolean",
"operation": "true",
"singleValue": true
},
"leftValue": "={{ $('Workflow Call').item.json.fulltext }}",
"rightValue": ""
}
]
}
},
"typeVersion": 2.2
},
{
"id": "325b73df-6fe0-4c22-985e-0916a09a8865",
"name": "Fulltext Output",
"type": "n8n-nodes-base.set",
"position": [
1380,
0
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "91a15268-86a9-4390-9e19-9fba4d21aeed",
"name": "title",
"type": "string",
"value": "={{ $json.title.replace(/\\p{Extended_Pictographic}/gu, '') }}"
},
{
"id": "90d16e3d-49ca-4a65-a4ae-cd689de990db",
"name": "text",
"type": "string",
"value": "={{\n( $json.textContent || '' )\n.replace(/\\p{Extended_Pictographic}/gu, '')\n.replace(/[\\r\\n]+/g, ' ')\n.replace(/\\s+/g, ' ')\n.trim()\n}}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "32e88e32-0068-47de-8f72-aee167f15ca2",
"name": "Summary Output",
"type": "n8n-nodes-base.set",
"position": [
1380,
200
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "91a15268-86a9-4390-9e19-9fba4d21aeed",
"name": "title",
"type": "string",
"value": "={{ $json.title.replace(/\\p{Extended_Pictographic}/gu, '') }}"
},
{
"id": "28476e01-485e-4373-a6c3-b3703d4ba1e4",
"name": "url",
"type": "string",
"value": "={{ $('Workflow Call').item.json.url }}"
},
{
"id": "90d16e3d-49ca-4a65-a4ae-cd689de990db",
"name": "content",
"type": "string",
"value": "={{\n( $json.excerpt || '' )\n.replace(/\\p{Extended_Pictographic}/gu, '')\n.replace(/[\\r\\n]+/g, ' ')\n.replace(/\\s+/g, ' ')\n.trim()\n}}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "96438bb1-1918-4ae6-9a40-0624968ca7b3",
"name": "Is Binary",
"type": "n8n-nodes-base.if",
"position": [
720,
0
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "8255ef66-f18d-4f38-a283-592cbd617109",
"operator": {
"type": "object",
"operation": "exists",
"singleValue": true
},
"leftValue": "={{ $binary.data }}",
"rightValue": ".pdf"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "5bb977be-3e50-4240-a05a-4df23e8f7470",
"name": "ContentType Error",
"type": "n8n-nodes-base.stopAndError",
"position": [
940,
-80
],
"parameters": {
"errorMessage": "=Unsupported content-type"
},
"typeVersion": 1
},
{
"id": "7b68057e-9189-4291-a40f-e9941443a65a",
"name": "Workflow Call",
"type": "n8n-nodes-base.executeWorkflowTrigger",
"position": [
-380,
120
],
"parameters": {
"workflowInputs": {
"values": [
{
"name": "url"
},
{
"name": "fulltext",
"type": "boolean"
}
]
}
},
"typeVersion": 1.1
},
{
"id": "bbe6563d-1131-4f7e-9a19-0dff16d1adb5",
"name": "Haftnotiz",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1200,
-100
],
"parameters": {
"width": 760,
"height": 640,
"content": "# WebPage Reader for AI Agents & Workflows\n\nThis sub-workflow enables reliable and clean scraping of any public webpage by simply passing a **url** parameter. It is designed to be embedded into other workflows or used as a tool for AI agents.\n\n🧩 This template requires the [n8n-nodes-webpage-content-extractor](https://www.npmjs.com/package/n8n-nodes-webpage-content-extractor) community node, so it only works in self-hosted n8n environments.\n\n💡 If the site is protected by anti-bot systems (like Cloudflare), it will automatically fallback to [Scrape.do](https://scrape.do/), a scraping API with a generous free plan. You only need to provide your API Token.\n\n## Input Parameters:\n- **url** (string): the webpage URL to scrape\n- **fulltext** (boolean): set true for full page content, false for summarized output\n\n## Output Modes:\n- **fulltext: true** — returns *{ title, text }* with full page content\n- **fulltext: false** — returns *{ title, url, content }* with a short excerpt\n\n## Usage:\nIn your workflows you can invoke this workflow using the **Execute Sub-workflow** or **Call n8n Workflow Tool** nodes.\nRemember to pass the url as a parameter and configure the fulltext option when invoking it.\n\n\n*(See the Setup note for instructions on how to set up this workflow.)*"
},
"typeVersion": 1
},
{
"id": "cf3a4e8f-d63f-482d-81eb-746ed7f66c85",
"name": "Haftnotiz1",
"type": "n8n-nodes-base.stickyNote",
"position": [
1600,
-100
],
"parameters": {
"width": 680,
"height": 660,
"content": "# Setup\nTo set up this workflow you will need to install the [n8n-nodes-webpage-content-extractor](https://www.npmjs.com/package/n8n-nodes-webpage-content-extractor) community node and API Token from your [Scrape.do](https://scrape.do/) account. Then you will just need to configure the `Scrape.do` node here with your credentials.\n\n## Community Node Installation\nBefore importing this workflow you first need to install this node on your n8n.\n- Go to your n8n's settings page. By clicking on the three dots next to your username in the bottom left corner of the screen.\n- In the left side menu click on Community Nodes.\n- Now click on the Install button.\n- In the npm Package Name field enter **n8n-nodes-webpage-content-extractor**, check the box that says *I understand the risks of installing unverified code from a public source*, and then click the Install button.\n\n## `Scrape.do` Node Setup\nBefore configuring the node, create your account on [Scrape.do](https://scrape.do/) and save your API Token\n- Open the Node `Scrape.do` configuration window by double-clicking on it.\n- In the **Authentication** field, select the **Generic Credential Type** option.\n- In the **Generic Credential Type** field below select the **Query Auth** option\n- In the **Query Auth** field below select **Create new credential**. To save your token to a new n8n credential.\n- In the window that opens, in the **Name** field, enter the word **token** only. In the **Value** field, paste your **API Token**. Then click the Save button.\n\n\n\n**Your workflow is ready! You can now use it in your workflows and in your AI Agents!**"
},
"typeVersion": 1
}
],
"active": false,
"pinData": {},
"settings": {
"callerPolicy": "workflowsFromSameOwner",
"errorWorkflow": "4HcJPFvOCSd7pZeG",
"executionOrder": "v1",
"saveDataSuccessExecution": "none"
},
"versionId": "8cc14be0-b5b6-41c3-8838-e92591538965",
"connections": {
"7e793496-3ba7-4a30-bb6c-d483c00671c6": {
"main": [
[
{
"node": "e52eddc5-72a7-4bd8-8679-ecedccad447c",
"type": "main",
"index": 0
}
],
[
{
"node": "9ae25973-ffa0-4b14-943b-d8a9fa0ee3b0",
"type": "main",
"index": 0
}
]
]
},
"f0f8106a-9a8c-492e-8082-fc82a3852765": {
"main": [
[
{
"node": "325b73df-6fe0-4c22-985e-0916a09a8865",
"type": "main",
"index": 0
}
],
[
{
"node": "32e88e32-0068-47de-8f72-aee167f15ca2",
"type": "main",
"index": 0
}
]
]
},
"96438bb1-1918-4ae6-9a40-0624968ca7b3": {
"main": [
[
{
"node": "5bb977be-3e50-4240-a05a-4df23e8f7470",
"type": "main",
"index": 0
}
],
[
{
"node": "f449a425-4ae9-462d-91bb-ff0b85a73202",
"type": "main",
"index": 0
}
]
]
},
"a90654b8-b83b-41ed-a665-9a0303a84de3": {
"main": [
[
{
"node": "96438bb1-1918-4ae6-9a40-0624968ca7b3",
"type": "main",
"index": 0
}
]
]
},
"7b68057e-9189-4291-a40f-e9941443a65a": {
"main": [
[
{
"node": "8af2bbee-ebd0-49e2-aa4a-bc58e1ccaf31",
"type": "main",
"index": 0
}
]
]
},
"8af2bbee-ebd0-49e2-aa4a-bc58e1ccaf31": {
"main": [
[
{
"node": "96438bb1-1918-4ae6-9a40-0624968ca7b3",
"type": "main",
"index": 0
}
],
[
{
"node": "7e793496-3ba7-4a30-bb6c-d483c00671c6",
"type": "main",
"index": 0
}
]
]
},
"f449a425-4ae9-462d-91bb-ff0b85a73202": {
"main": [
[
{
"node": "f0f8106a-9a8c-492e-8082-fc82a3852765",
"type": "main",
"index": 0
}
]
]
},
"e52eddc5-72a7-4bd8-8679-ecedccad447c": {
"main": [
[
{
"node": "a90654b8-b83b-41ed-a665-9a0303a84de3",
"type": "main",
"index": 0
}
],
[
{
"node": "62c1fb07-35e0-4942-b38d-b888b559e109",
"type": "main",
"index": 0
}
]
]
}
}
}Häufig gestellte Fragen
Wie verwende ich diesen Workflow?
Kopieren Sie den obigen JSON-Code, erstellen Sie einen neuen Workflow in Ihrer n8n-Instanz und wählen Sie "Aus JSON importieren". Fügen Sie die Konfiguration ein und passen Sie die Anmeldedaten nach Bedarf an.
Für welche Szenarien ist dieser Workflow geeignet?
Fortgeschritten - Dokumentenextraktion
Ist es kostenpflichtig?
Dieser Workflow ist völlig kostenlos. Beachten Sie jedoch, dass Drittanbieterdienste (wie OpenAI API), die im Workflow verwendet werden, möglicherweise kostenpflichtig sind.
Verwandte Workflows
PDF zu Auftrag
KI-gesteuerte Automatisierung der Umwandlung von PDF-Bestellaufträgen in Adobe Commerce-Aufträge
If
Set
Code
+
If
Set
Code
96 NodesJKingma
Dokumentenextraktion
Dokumentenagent-Vorlage
Benutzerdefinierte PDF-Dokumente aus Vorlagen mit Gemini und Google Drive erstellen
If
Set
Code
+
If
Set
Code
36 NodesOzgur Karateke
Dokumentenextraktion
AI Email Triage & Alert System with GPT-4 and Telegram Notifications
If
Set
Gmail
+
If
Set
Gmail
104 NodesPeter Joslyn
Support
Erstellen von KI-Videos mit OpenAI-Skripten, Leonardo-Bildern und HeyGen-Avataren
Erstelle KI-Videos mit OpenAI-Skripten, Leonardo-Bildern und HeyGen-Avataren
If
Set
Code
+
If
Set
Code
68 NodesAdam Crafts
Design
Erstellen von KI-Videos mit Skripten, Bildern und HeyGen-Avataren (🔥Zeitlich begrenztes Angebot)
Erstelle KI-Videos mit Skripten, Bildern und HeyGen-Avataren (🔥Zeitlich begrenztes Angebot)
If
Set
Code
+
If
Set
Code
68 NodesAdam Crafts
Design
1. Playlist-Details-Einstellungen für Roboter-Kopie
Erstelle KI-generierte YouTube-Musik-Playlists mit Suno, GPT-4, Runway und Creatomate
If
Set
Code
+
If
Set
Code
203 NodesJoseph
Content-Erstellung
Workflow-Informationen
Schwierigkeitsgrad
Fortgeschritten
Anzahl der Nodes15
Kategorie1
Node-Typen7
Autor
Externe Links
Auf n8n.io ansehen →
Diesen Workflow teilen