Über fünf Datenbanken hinweg akademische Recherche mit PDF-Vektorisierung und mehrfachem Export
Dies ist ein AI RAG, Multimodal AI-Bereich Automatisierungsworkflow mit 9 Nodes. Hauptsächlich werden Set, Code, PdfVector, WriteBinaryFile und andere Nodes verwendet. Wissenschaftliche Recherche in fünf Datenbanken mit PDF-Vektorisierung und Mehrfach-Export
- •Keine besonderen Voraussetzungen, sofort nach Import nutzbar
Verwendete Nodes (9)
Kategorie
{
"meta": {
"instanceId": "placeholder"
},
"nodes": [
{
"id": "search-info",
"name": "Suchkonfiguration",
"type": "n8n-nodes-base.stickyNote",
"position": [
250,
150
],
"parameters": {
"content": "## Multi-Database Search\n\nSearches:\n- PubMed\n- ArXiv\n- Google Scholar\n- Semantic Scholar\n- ERIC\n\nDeduplicates and ranks results"
},
"typeVersion": 1
},
{
"id": "search-params",
"name": "Suchparameter festlegen",
"type": "n8n-nodes-base.set",
"position": [
450,
300
],
"parameters": {
"values": {
"number": [
{
"name": "yearFrom",
"value": 2020
},
{
"name": "resultsPerSource",
"value": 25
}
],
"string": [
{
"name": "searchQuery",
"value": "machine learning healthcare applications"
}
]
}
},
"typeVersion": 1
},
{
"id": "pdfvector-search",
"name": "PDF Vector - Multi-DB-Suche",
"type": "n8n-nodes-pdfvector.pdfVector",
"position": [
650,
300
],
"parameters": {
"limit": "={{ $json.resultsPerSource }}",
"query": "={{ $json.searchQuery }}",
"fields": [
"title",
"authors",
"year",
"doi",
"abstract",
"totalCitations",
"pdfUrl",
"provider"
],
"resource": "academic",
"yearFrom": "={{ $json.yearFrom }}",
"operation": "search",
"providers": [
"pubmed",
"semantic_scholar",
"arxiv",
"google_scholar",
"eric"
]
},
"typeVersion": 1
},
{
"id": "deduplicate",
"name": "Ergebnisse deduplizieren",
"type": "n8n-nodes-base.code",
"position": [
850,
300
],
"parameters": {
"functionCode": "// Deduplicate papers based on DOI and title similarity\nconst papers = $json;\nconst unique = new Map();\n\npapers.forEach(paper => {\n // First check DOI\n if (paper.doi && !unique.has(paper.doi)) {\n unique.set(paper.doi, paper);\n } else if (!paper.doi) {\n // For papers without DOI, check title similarity\n const normalizedTitle = paper.title.toLowerCase().replace(/[^a-z0-9]/g, '');\n let isDuplicate = false;\n \n for (const [key, existingPaper] of unique) {\n const existingTitle = existingPaper.title.toLowerCase().replace(/[^a-z0-9]/g, '');\n if (normalizedTitle === existingTitle) {\n isDuplicate = true;\n // Merge provider info\n if (!existingPaper.providers) existingPaper.providers = [existingPaper.provider];\n existingPaper.providers.push(paper.provider);\n break;\n }\n }\n \n if (!isDuplicate) {\n unique.set(normalizedTitle, paper);\n }\n }\n});\n\nreturn Array.from(unique.values());"
},
"typeVersion": 1
},
{
"id": "rank-results",
"name": "Nach Relevanz sortieren",
"type": "n8n-nodes-base.code",
"position": [
1050,
300
],
"parameters": {
"functionCode": "// Calculate relevance score\nconst papers = $json;\nconst query = $node['Set Search Parameters'].json.searchQuery.toLowerCase();\n\nconst scored = papers.map(paper => {\n let score = 0;\n \n // Title relevance\n const titleWords = paper.title.toLowerCase().split(' ');\n const queryWords = query.split(' ');\n queryWords.forEach(word => {\n if (titleWords.includes(word)) score += 10;\n });\n \n // Citation impact\n score += Math.log(paper.totalCitations + 1) * 5;\n \n // Recency bonus\n const yearDiff = new Date().getFullYear() - paper.year;\n score += Math.max(0, 10 - yearDiff);\n \n // Full text availability\n if (paper.pdfUrl) score += 15;\n \n return { ...paper, relevanceScore: score };\n});\n\n// Sort by relevance\nreturn scored.sort((a, b) => b.relevanceScore - a.relevanceScore);"
},
"typeVersion": 1
},
{
"id": "generate-bibtex",
"name": "BibTeX generieren",
"type": "n8n-nodes-base.code",
"position": [
1250,
250
],
"parameters": {
"functionCode": "// Generate BibTeX entries\nconst papers = $json;\n\nconst bibtex = papers.map((paper, index) => {\n const key = paper.doi ? paper.doi.replace(/[^a-zA-Z0-9]/g, '') : `paper${index}`;\n const authors = paper.authors.join(' and ');\n \n return `@article{${key},\n title={${paper.title}},\n author={${authors}},\n year={${paper.year}},\n doi={${paper.doi || ''}},\n abstract={${paper.abstract || ''}}\n}`;\n}).join('\\n\\n');\n\nreturn { bibtex, papers };"
},
"typeVersion": 1
},
{
"id": "export-bibtex",
"name": "BibTeX-Datei exportieren",
"type": "n8n-nodes-base.writeBinaryFile",
"position": [
1450,
250
],
"parameters": {
"fileName": "search_results_{{ $now.format('yyyy-MM-dd') }}.bib",
"fileContent": "={{ $json.bibtex }}"
},
"typeVersion": 1
},
{
"id": "export-json",
"name": "Export JSON",
"type": "n8n-nodes-base.writeBinaryFile",
"position": [
1450,
350
],
"parameters": {
"fileName": "search_results_{{ $now.format('yyyy-MM-dd') }}.json",
"fileContent": "={{ JSON.stringify($json.papers, null, 2) }}"
},
"typeVersion": 1
},
{
"id": "export-csv",
"name": "Export CSV",
"type": "n8n-nodes-base.writeBinaryFile",
"position": [
1450,
450
],
"parameters": {
"fileName": "search_results_{{ $now.format('yyyy-MM-dd') }}.csv",
"fileContent": "={{ $json.papers.map(p => [p.title, p.authors.join(';'), p.year, p.doi, p.totalCitations, p.pdfUrl].join(',\t')).join('\\n') }}"
},
"typeVersion": 1
}
],
"connections": {
"generate-bibtex": {
"main": [
[
{
"node": "export-bibtex",
"type": "main",
"index": 0
},
{
"node": "export-json",
"type": "main",
"index": 0
},
{
"node": "export-csv",
"type": "main",
"index": 0
}
]
]
},
"rank-results": {
"main": [
[
{
"node": "generate-bibtex",
"type": "main",
"index": 0
}
]
]
},
"deduplicate": {
"main": [
[
{
"node": "rank-results",
"type": "main",
"index": 0
}
]
]
},
"search-params": {
"main": [
[
{
"node": "pdfvector-search",
"type": "main",
"index": 0
}
]
]
},
"pdfvector-search": {
"main": [
[
{
"node": "deduplicate",
"type": "main",
"index": 0
}
]
]
}
}
}Wie verwende ich diesen Workflow?
Kopieren Sie den obigen JSON-Code, erstellen Sie einen neuen Workflow in Ihrer n8n-Instanz und wählen Sie "Aus JSON importieren". Fügen Sie die Konfiguration ein und passen Sie die Anmeldedaten nach Bedarf an.
Für welche Szenarien ist dieser Workflow geeignet?
Fortgeschritten - KI RAG, Multimodales KI
Ist es kostenpflichtig?
Dieser Workflow ist völlig kostenlos. Beachten Sie jedoch, dass Drittanbieterdienste (wie OpenAI API), die im Workflow verwendet werden, möglicherweise kostenpflichtig sind.
Verwandte Workflows
PDF Vector
@pdfvectorA fully featured PDF APIs for developers - Parse any PDF or Word document, extract structured data, and access millions of academic papers - all through simple APIs.
Diesen Workflow teilen