Extraktion-von-Links-und-URLs-aus-PDF-Dokumenten-mit-PDF.co
Dies ist ein Document Extraction-Bereich Automatisierungsworkflow mit 10 Nodes. Hauptsächlich werden Code, PDFco Api, FormTrigger, HttpRequest und andere Nodes verwendet. Links und URLs aus PDF-Dokumenten mit PDF.co extrahieren
- •Möglicherweise sind Ziel-API-Anmeldedaten erforderlich
Verwendete Nodes (10)
Kategorie
{
"meta": {
"instanceId": "8396a2c42d7edc37401c19b3909f25edcb0e4a6dc3914c824fddd73a329a575c",
"templateCredsSetupCompleted": true
},
"nodes": [
{
"id": "f6e71b74-1ecb-43e8-baa2-bf05536d01b7",
"name": "PDF laden",
"type": "n8n-nodes-base.formTrigger",
"position": [
-2224,
-384
],
"webhookId": "a919be0e-e4c8-4b9c-b8e9-b1ffdd6ef9fb",
"parameters": {
"options": {},
"formTitle": "pdf",
"formFields": {
"values": [
{
"fieldType": "file",
"fieldLabel": "data",
"multipleFiles": false,
"acceptFileTypes": ".pdf"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "f24dd98b-b3c4-47f1-8345-10097e53803d",
"name": "Hochladen",
"type": "n8n-nodes-pdfco.PDFco Api",
"position": [
-2016,
-384
],
"parameters": {
"name": "test",
"operation": "Upload File to PDF.co",
"binaryData": true
},
"credentials": {
"pdfcoApi": {
"id": "RgIt0qlGxtcDwXW7",
"name": "PDF.co account"
}
},
"typeVersion": 1
},
{
"id": "b354cde6-5354-4052-9a6c-d66c328a946f",
"name": "PDF zu HTML",
"type": "n8n-nodes-pdfco.PDFco Api",
"position": [
-1776,
-384
],
"parameters": {
"url": "={{ $json.url }}",
"operation": "Convert from PDF",
"advancedOptions": {}
},
"credentials": {
"pdfcoApi": {
"id": "RgIt0qlGxtcDwXW7",
"name": "PDF.co account"
}
},
"typeVersion": 1
},
{
"id": "e15b5c0d-5a46-4faa-828f-25e56cfce322",
"name": "HTML abrufen",
"type": "n8n-nodes-base.httpRequest",
"position": [
-1568,
-384
],
"parameters": {
"url": "={{ $json.url }}",
"options": {}
},
"typeVersion": 4.2
},
{
"id": "73506c94-6265-4d89-b386-e908285d14e0",
"name": "Notiz",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2288,
-448
],
"parameters": {
"width": 208,
"height": 240,
"content": "## Load PDF\n"
},
"typeVersion": 1
},
{
"id": "6d23ab8a-5bae-4317-b73e-fb1b2ba8ff16",
"name": "Notiz1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2080,
-448
],
"parameters": {
"color": 2,
"height": 240,
"content": "## Upload to PDF.CO \n"
},
"typeVersion": 1
},
{
"id": "72be2279-3028-4c24-8973-00879cff375a",
"name": "Notiz2",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1840,
-448
],
"parameters": {
"color": 4,
"width": 224,
"height": 240,
"content": "## PDF to HTML"
},
"typeVersion": 1
},
{
"id": "cebf4aeb-549c-4c9e-84eb-41d880834fb5",
"name": "Notiz3",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1616,
-448
],
"parameters": {
"width": 192,
"height": 240,
"content": "## Get HTML"
},
"typeVersion": 1
},
{
"id": "8f6d9763-dece-45f6-a78b-1b5f6891f2fa",
"name": "Code1",
"type": "n8n-nodes-base.code",
"position": [
-1360,
-384
],
"parameters": {
"jsCode": "// Recorrer todos los items que entran al nodo\nconst resultados = [];\n\nfor (const item of $input.all()) {\n const texto = item.json.data || '';\n // Regex para URLs (http, https, www)\n const regexUrl = /(https?:\\/\\/[^\\s]+)|(www\\.[^\\s]+)/gi;\n \n // Extraer URLs, si no hay ninguna, el resultado es []\n const urls = texto.match(regexUrl) || [];\n \n // Por cada URL encontrada, crear un nuevo item con la URL\n for (const url of urls) {\n resultados.push({ json: { url } });\n }\n}\n\n// Devolver un array de objetos con las URLs extraídas\nreturn resultados;\n"
},
"typeVersion": 2
},
{
"id": "0c49f98f-0b3c-4c47-ad34-b60b02c5f3a5",
"name": "Notiz4",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1424,
-448
],
"parameters": {
"color": 5,
"width": 208,
"height": 240,
"content": "## Get URL's \n"
},
"typeVersion": 1
}
],
"pinData": {},
"connections": {
"f24dd98b-b3c4-47f1-8345-10097e53803d": {
"main": [
[
{
"node": "b354cde6-5354-4052-9a6c-d66c328a946f",
"type": "main",
"index": 0
}
]
]
},
"e15b5c0d-5a46-4faa-828f-25e56cfce322": {
"main": [
[
{
"node": "8f6d9763-dece-45f6-a78b-1b5f6891f2fa",
"type": "main",
"index": 0
}
]
]
},
"f6e71b74-1ecb-43e8-baa2-bf05536d01b7": {
"main": [
[
{
"node": "f24dd98b-b3c4-47f1-8345-10097e53803d",
"type": "main",
"index": 0
}
]
]
},
"b354cde6-5354-4052-9a6c-d66c328a946f": {
"main": [
[
{
"node": "e15b5c0d-5a46-4faa-828f-25e56cfce322",
"type": "main",
"index": 0
}
]
]
}
}
}Wie verwende ich diesen Workflow?
Kopieren Sie den obigen JSON-Code, erstellen Sie einen neuen Workflow in Ihrer n8n-Instanz und wählen Sie "Aus JSON importieren". Fügen Sie die Konfiguration ein und passen Sie die Anmeldedaten nach Bedarf an.
Für welche Szenarien ist dieser Workflow geeignet?
Fortgeschritten - Dokumentenextraktion
Ist es kostenpflichtig?
Dieser Workflow ist völlig kostenlos. Beachten Sie jedoch, dass Drittanbieterdienste (wie OpenAI API), die im Workflow verwendet werden, möglicherweise kostenpflichtig sind.
Verwandte Workflows
Mauricio Perera
@rckflrAutomation consultant with over 10 years of experience specializing in AI, no-code, and workflow optimization. I’ve delivered tailored AI and NLP solutions across real estate, healthcare, and more, enhancing efficiency and customer experiences. Proficient in tools like Make, Airtable, and Zapier, I also integrate GPT models to create scalable, innovative automations. Contact me to discuss custom n8n workflows or advanced automations to streamline your processes.
Diesen Workflow teilen