Webpage Reader

Name: Webpage Reader
Rating: 4.5 (10 reviews)
Author: Arthur Braghetto
Fortgeschritten
Dies ist ein Document Extraction-Bereich Automatisierungsworkflow mit 15 Nodes. Hauptsächlich werden If, Set, HttpRequest, StopAndError, ExecuteWorkflowTrigger und andere Nodes verwendet. Saubere Webseiteninhalte für KI-Agenten und Workflows extrahieren, mit Anti-Crawler-Backup-Lösung
Voraussetzungen
•Möglicherweise sind Ziel-API-Anmeldedaten erforderlich
Verwendete Nodes (15)

ExecuteWorkflowTrigger
WebpageContentExtractor
Kategorie

Dokumentenextraktion
Workflow-Vorschau
Visualisierung der Node-Verbindungen, mit Zoom und Pan
Content Extractor
Try Antibot Evasion
Scrape.do
Server Error
Not 404
Not Found
Simple Scraper
Full Text
Fulltext Output
Summary Output
Is Binary
ContentType Error
Workflow Call
React Flow
Workflow exportieren
Kopieren Sie die folgende JSON-Konfiguration und importieren Sie sie in n8n
{
  "id": "9UyGvrk6EDY6Hm3W",
  "meta": {
    "instanceId": "7e84375f1a5a2398bff60c3e83bb370423dae55c261ed7c48ca02f15548655a7",
    "templateCredsSetupCompleted": true
  },
  "name": "WebPage-Reader",
  "tags": [],
  "nodes": [
    {
      "id": "f449a425-4ae9-462d-91bb-ff0b85a73202",
      "name": "Content Extractor",
      "type": "n8n-nodes-webpage-content-extractor.webpageContentExtractor",
      "position": [
        940,
        100
      ],
      "parameters": {
        "html": "={{ $json.data }}"
      },
      "typeVersion": 1
    },
    {
      "id": "e52eddc5-72a7-4bd8-8679-ecedccad447c",
      "name": "Try Antibot Evasion",
      "type": "n8n-nodes-base.if",
      "position": [
        280,
        180
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "or",
          "conditions": [
            {
              "id": "1351d4e8-1c27-43c2-8335-aee7c097422a",
              "operator": {
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.error.code }}",
              "rightValue": "ECONNABORTED"
            },
            {
              "id": "28a4c2eb-0a9b-44ac-87d5-6571be2fb447",
              "operator": {
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.error.code }}",
              "rightValue": "ETIMEDOUT"
            },
            {
              "id": "1287e08b-a342-4651-8e56-1d1ff4677222",
              "operator": {
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.error.code }}",
              "rightValue": "ERR_CANCELED"
            },
            {
              "id": "45256daa-063f-4ed3-8ef0-5ec91cdc0974",
              "operator": {
                "name": "filter.operator.equals",
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.error.code }}",
              "rightValue": "ERR_BAD_REQUEST"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "a90654b8-b83b-41ed-a665-9a0303a84de3",
      "name": "Scrape.do",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        500,
        180
      ],
      "parameters": {
        "url": "=http://api.scrape.do",
        "options": {
          "timeout": 120000
        },
        "sendQuery": true,
        "authentication": "genericCredentialType",
        "genericAuthType": "httpQueryAuth",
        "queryParameters": {
          "parameters": [
            {
              "name": "url",
              "value": "={{ $json.url }}"
            }
          ]
        }
      },
      "credentials": {
        "httpQueryAuth": {
          "id": "SMKkxhdbOewTAnqe",
          "name": "Scrape.do account"
        }
      },
      "retryOnFail": true,
      "typeVersion": 4.2,
      "waitBetweenTries": 5000
    },
    {
      "id": "62c1fb07-35e0-4942-b38d-b888b559e109",
      "name": "Server Error",
      "type": "n8n-nodes-base.stopAndError",
      "position": [
        500,
        380
      ],
      "parameters": {
        "errorMessage": "=Error requesting website ({{ $json.error.code }})"
      },
      "typeVersion": 1
    },
    {
      "id": "7e793496-3ba7-4a30-bb6c-d483c00671c6",
      "name": "Not 404",
      "type": "n8n-nodes-base.if",
      "position": [
        60,
        180
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "81558598-6188-4712-962c-3f80fcba1297",
              "operator": {
                "type": "number",
                "operation": "notEquals"
              },
              "leftValue": "={{ $json.error.status }}",
              "rightValue": 404
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "9ae25973-ffa0-4b14-943b-d8a9fa0ee3b0",
      "name": "Not Found",
      "type": "n8n-nodes-base.stopAndError",
      "position": [
        280,
        380
      ],
      "parameters": {
        "errorMessage": "=Error requesting website (404)"
      },
      "typeVersion": 1
    },
    {
      "id": "8af2bbee-ebd0-49e2-aa4a-bc58e1ccaf31",
      "name": "Simple Scraper",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueErrorOutput",
      "position": [
        -160,
        120
      ],
      "parameters": {
        "url": "={{ $json.url }}",
        "options": {
          "timeout": 10000,
          "redirect": {
            "redirect": {}
          },
          "allowUnauthorizedCerts": true
        },
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "User-Agent",
              "value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"
            }
          ]
        }
      },
      "retryOnFail": true,
      "typeVersion": 4.2,
      "waitBetweenTries": 5000
    },
    {
      "id": "f0f8106a-9a8c-492e-8082-fc82a3852765",
      "name": "Full Text",
      "type": "n8n-nodes-base.if",
      "position": [
        1160,
        100
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "b32569d1-ba84-401f-9dc9-99b2c804cba2",
              "operator": {
                "type": "boolean",
                "operation": "true",
                "singleValue": true
              },
              "leftValue": "={{ $('Workflow Call').item.json.fulltext }}",
              "rightValue": ""
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "325b73df-6fe0-4c22-985e-0916a09a8865",
      "name": "Fulltext Output",
      "type": "n8n-nodes-base.set",
      "position": [
        1380,
        0
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "91a15268-86a9-4390-9e19-9fba4d21aeed",
              "name": "title",
              "type": "string",
              "value": "={{ $json.title.replace(/\\p{Extended_Pictographic}/gu, '') }}"
            },
            {
              "id": "90d16e3d-49ca-4a65-a4ae-cd689de990db",
              "name": "text",
              "type": "string",
              "value": "={{\n( $json.textContent || '' )\n.replace(/\\p{Extended_Pictographic}/gu, '')\n.replace(/[\\r\\n]+/g, ' ')\n.replace(/\\s+/g, ' ')\n.trim()\n}}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "32e88e32-0068-47de-8f72-aee167f15ca2",
      "name": "Summary Output",
      "type": "n8n-nodes-base.set",
      "position": [
        1380,
        200
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "91a15268-86a9-4390-9e19-9fba4d21aeed",
              "name": "title",
              "type": "string",
              "value": "={{ $json.title.replace(/\\p{Extended_Pictographic}/gu, '') }}"
            },
            {
              "id": "28476e01-485e-4373-a6c3-b3703d4ba1e4",
              "name": "url",
              "type": "string",
              "value": "={{ $('Workflow Call').item.json.url }}"
            },
            {
              "id": "90d16e3d-49ca-4a65-a4ae-cd689de990db",
              "name": "content",
              "type": "string",
              "value": "={{\n( $json.excerpt || '' )\n.replace(/\\p{Extended_Pictographic}/gu, '')\n.replace(/[\\r\\n]+/g, ' ')\n.replace(/\\s+/g, ' ')\n.trim()\n}}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "96438bb1-1918-4ae6-9a40-0624968ca7b3",
      "name": "Is Binary",
      "type": "n8n-nodes-base.if",
      "position": [
        720,
        0
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "8255ef66-f18d-4f38-a283-592cbd617109",
              "operator": {
                "type": "object",
                "operation": "exists",
                "singleValue": true
              },
              "leftValue": "={{ $binary.data }}",
              "rightValue": ".pdf"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "5bb977be-3e50-4240-a05a-4df23e8f7470",
      "name": "ContentType Error",
      "type": "n8n-nodes-base.stopAndError",
      "position": [
        940,
        -80
      ],
      "parameters": {
        "errorMessage": "=Unsupported content-type"
      },
      "typeVersion": 1
    },
    {
      "id": "7b68057e-9189-4291-a40f-e9941443a65a",
      "name": "Workflow Call",
      "type": "n8n-nodes-base.executeWorkflowTrigger",
      "position": [
        -380,
        120
      ],
      "parameters": {
        "workflowInputs": {
          "values": [
            {
              "name": "url"
            },
            {
              "name": "fulltext",
              "type": "boolean"
            }
          ]
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "bbe6563d-1131-4f7e-9a19-0dff16d1adb5",
      "name": "Haftnotiz",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1200,
        -100
      ],
      "parameters": {
        "width": 760,
        "height": 640,
        "content": "# WebPage Reader for AI Agents & Workflows\n\nThis sub-workflow enables reliable and clean scraping of any public webpage by simply passing a **url** parameter. It is designed to be embedded into other workflows or used as a tool for AI agents.\n\n🧩 This template requires the [n8n-nodes-webpage-content-extractor](https://www.npmjs.com/package/n8n-nodes-webpage-content-extractor) community node, so it only works in self-hosted n8n environments.\n\n💡 If the site is protected by anti-bot systems (like Cloudflare), it will automatically fallback to [Scrape.do](https://scrape.do/), a scraping API with a generous free plan. You only need to provide your API Token.\n\n## Input Parameters:\n- **url** (string): the webpage URL to scrape\n- **fulltext** (boolean): set true for full page content, false for summarized output\n\n## Output Modes:\n- **fulltext: true** — returns *{ title, text }* with full page content\n- **fulltext: false** — returns *{ title, url, content }* with a short excerpt\n\n## Usage:\nIn your workflows you can invoke this workflow using the **Execute Sub-workflow** or **Call n8n Workflow Tool** nodes.\nRemember to pass the url as a parameter and configure the fulltext option when invoking it.\n\n\n*(See the Setup note for instructions on how to set up this workflow.)*"
      },
      "typeVersion": 1
    },
    {
      "id": "cf3a4e8f-d63f-482d-81eb-746ed7f66c85",
      "name": "Haftnotiz1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1600,
        -100
      ],
      "parameters": {
        "width": 680,
        "height": 660,
        "content": "# Setup\nTo set up this workflow you will need to install the [n8n-nodes-webpage-content-extractor](https://www.npmjs.com/package/n8n-nodes-webpage-content-extractor) community node and API Token from your [Scrape.do](https://scrape.do/) account. Then you will just need to configure the `Scrape.do` node here with your credentials.\n\n## Community Node Installation\nBefore importing this workflow you first need to install this node on your n8n.\n- Go to your n8n's settings page. By clicking on the three dots next to your username in the bottom left corner of the screen.\n- In the left side menu click on Community Nodes.\n- Now click on the Install button.\n- In the npm Package Name field enter **n8n-nodes-webpage-content-extractor**, check the box that says *I understand the risks of installing unverified code from a public source*, and then click the Install button.\n\n## `Scrape.do` Node Setup\nBefore configuring the node, create your account on [Scrape.do](https://scrape.do/) and save your API Token\n- Open the Node `Scrape.do` configuration window by double-clicking on it.\n- In the **Authentication** field, select the **Generic Credential Type** option.\n- In the **Generic Credential Type** field below select the **Query Auth** option\n- In the **Query Auth** field below select **Create new credential**. To save your token to a new n8n credential.\n- In the window that opens, in the **Name** field, enter the word **token** only. In the **Value** field, paste your **API Token**. Then click the Save button.\n\n\n\n**Your workflow is ready! You can now use it in your workflows and in your AI Agents!**"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "callerPolicy": "workflowsFromSameOwner",
    "errorWorkflow": "4HcJPFvOCSd7pZeG",
    "executionOrder": "v1",
    "saveDataSuccessExecution": "none"
  },
  "versionId": "8cc14be0-b5b6-41c3-8838-e92591538965",
  "connections": {
    "7e793496-3ba7-4a30-bb6c-d483c00671c6": {
      "main": [
        [
          {
            "node": "e52eddc5-72a7-4bd8-8679-ecedccad447c",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "9ae25973-ffa0-4b14-943b-d8a9fa0ee3b0",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "f0f8106a-9a8c-492e-8082-fc82a3852765": {
      "main": [
        [
          {
            "node": "325b73df-6fe0-4c22-985e-0916a09a8865",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "32e88e32-0068-47de-8f72-aee167f15ca2",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "96438bb1-1918-4ae6-9a40-0624968ca7b3": {
      "main": [
        [
          {
            "node": "5bb977be-3e50-4240-a05a-4df23e8f7470",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "f449a425-4ae9-462d-91bb-ff0b85a73202",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "a90654b8-b83b-41ed-a665-9a0303a84de3": {
      "main": [
        [
          {
            "node": "96438bb1-1918-4ae6-9a40-0624968ca7b3",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "7b68057e-9189-4291-a40f-e9941443a65a": {
      "main": [
        [
          {
            "node": "8af2bbee-ebd0-49e2-aa4a-bc58e1ccaf31",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "8af2bbee-ebd0-49e2-aa4a-bc58e1ccaf31": {
      "main": [
        [
          {
            "node": "96438bb1-1918-4ae6-9a40-0624968ca7b3",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "7e793496-3ba7-4a30-bb6c-d483c00671c6",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "f449a425-4ae9-462d-91bb-ff0b85a73202": {
      "main": [
        [
          {
            "node": "f0f8106a-9a8c-492e-8082-fc82a3852765",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "e52eddc5-72a7-4bd8-8679-ecedccad447c": {
      "main": [
        [
          {
            "node": "a90654b8-b83b-41ed-a665-9a0303a84de3",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "62c1fb07-35e0-4942-b38d-b888b559e109",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
Häufig gestellte Fragen
Wie verwende ich diesen Workflow?

Kopieren Sie den obigen JSON-Code, erstellen Sie einen neuen Workflow in Ihrer n8n-Instanz und wählen Sie "Aus JSON importieren". Fügen Sie die Konfiguration ein und passen Sie die Anmeldedaten nach Bedarf an.
Für welche Szenarien ist dieser Workflow geeignet?

Fortgeschritten - Dokumentenextraktion
Ist es kostenpflichtig?

Dieser Workflow ist völlig kostenlos. Beachten Sie jedoch, dass Drittanbieterdienste (wie OpenAI API), die im Workflow verwendet werden, möglicherweise kostenpflichtig sind.