Búsqueda académica en cinco bases de datos, con vectores PDF y múltiples exportaciones
Este es unAI RAG, Multimodal AIflujo de automatización del dominio deautomatización que contiene 9 nodos.Utiliza principalmente nodos como Set, Code, PdfVector, WriteBinaryFile. Búsqueda de investigación académica en cinco bases de datos, con vectores PDF y exportaciones múltiples
- •No hay requisitos previos especiales, puede importar y usarlo directamente
Nodos utilizados (9)
Categoría
{
"meta": {
"instanceId": "placeholder"
},
"nodes": [
{
"id": "search-info",
"name": "Configuración de búsqueda",
"type": "n8n-nodes-base.stickyNote",
"position": [
250,
150
],
"parameters": {
"content": "## Multi-Database Search\n\nSearches:\n- PubMed\n- ArXiv\n- Google Scholar\n- Semantic Scholar\n- ERIC\n\nDeduplicates and ranks results"
},
"typeVersion": 1
},
{
"id": "search-params",
"name": "Establecer parámetros de búsqueda",
"type": "n8n-nodes-base.set",
"position": [
450,
300
],
"parameters": {
"values": {
"number": [
{
"name": "yearFrom",
"value": 2020
},
{
"name": "resultsPerSource",
"value": 25
}
],
"string": [
{
"name": "searchQuery",
"value": "machine learning healthcare applications"
}
]
}
},
"typeVersion": 1
},
{
"id": "pdfvector-search",
"name": "PDF Vector - Búsqueda multi-BD",
"type": "n8n-nodes-pdfvector.pdfVector",
"position": [
650,
300
],
"parameters": {
"limit": "={{ $json.resultsPerSource }}",
"query": "={{ $json.searchQuery }}",
"fields": [
"title",
"authors",
"year",
"doi",
"abstract",
"totalCitations",
"pdfUrl",
"provider"
],
"resource": "academic",
"yearFrom": "={{ $json.yearFrom }}",
"operation": "search",
"providers": [
"pubmed",
"semantic_scholar",
"arxiv",
"google_scholar",
"eric"
]
},
"typeVersion": 1
},
{
"id": "deduplicate",
"name": "Eliminar duplicados",
"type": "n8n-nodes-base.code",
"position": [
850,
300
],
"parameters": {
"functionCode": "// Deduplicate papers based on DOI and title similarity\nconst papers = $json;\nconst unique = new Map();\n\npapers.forEach(paper => {\n // First check DOI\n if (paper.doi && !unique.has(paper.doi)) {\n unique.set(paper.doi, paper);\n } else if (!paper.doi) {\n // For papers without DOI, check title similarity\n const normalizedTitle = paper.title.toLowerCase().replace(/[^a-z0-9]/g, '');\n let isDuplicate = false;\n \n for (const [key, existingPaper] of unique) {\n const existingTitle = existingPaper.title.toLowerCase().replace(/[^a-z0-9]/g, '');\n if (normalizedTitle === existingTitle) {\n isDuplicate = true;\n // Merge provider info\n if (!existingPaper.providers) existingPaper.providers = [existingPaper.provider];\n existingPaper.providers.push(paper.provider);\n break;\n }\n }\n \n if (!isDuplicate) {\n unique.set(normalizedTitle, paper);\n }\n }\n});\n\nreturn Array.from(unique.values());"
},
"typeVersion": 1
},
{
"id": "rank-results",
"name": "Clasificar por relevancia",
"type": "n8n-nodes-base.code",
"position": [
1050,
300
],
"parameters": {
"functionCode": "// Calculate relevance score\nconst papers = $json;\nconst query = $node['Set Search Parameters'].json.searchQuery.toLowerCase();\n\nconst scored = papers.map(paper => {\n let score = 0;\n \n // Title relevance\n const titleWords = paper.title.toLowerCase().split(' ');\n const queryWords = query.split(' ');\n queryWords.forEach(word => {\n if (titleWords.includes(word)) score += 10;\n });\n \n // Citation impact\n score += Math.log(paper.totalCitations + 1) * 5;\n \n // Recency bonus\n const yearDiff = new Date().getFullYear() - paper.year;\n score += Math.max(0, 10 - yearDiff);\n \n // Full text availability\n if (paper.pdfUrl) score += 15;\n \n return { ...paper, relevanceScore: score };\n});\n\n// Sort by relevance\nreturn scored.sort((a, b) => b.relevanceScore - a.relevanceScore);"
},
"typeVersion": 1
},
{
"id": "generate-bibtex",
"name": "Generar BibTeX",
"type": "n8n-nodes-base.code",
"position": [
1250,
250
],
"parameters": {
"functionCode": "// Generate BibTeX entries\nconst papers = $json;\n\nconst bibtex = papers.map((paper, index) => {\n const key = paper.doi ? paper.doi.replace(/[^a-zA-Z0-9]/g, '') : `paper${index}`;\n const authors = paper.authors.join(' and ');\n \n return `@article{${key},\n title={${paper.title}},\n author={${authors}},\n year={${paper.year}},\n doi={${paper.doi || ''}},\n abstract={${paper.abstract || ''}}\n}`;\n}).join('\\n\\n');\n\nreturn { bibtex, papers };"
},
"typeVersion": 1
},
{
"id": "export-bibtex",
"name": "Exportar archivo BibTeX",
"type": "n8n-nodes-base.writeBinaryFile",
"position": [
1450,
250
],
"parameters": {
"fileName": "search_results_{{ $now.format('yyyy-MM-dd') }}.bib",
"fileContent": "={{ $json.bibtex }}"
},
"typeVersion": 1
},
{
"id": "export-json",
"name": "Exportar JSON",
"type": "n8n-nodes-base.writeBinaryFile",
"position": [
1450,
350
],
"parameters": {
"fileName": "search_results_{{ $now.format('yyyy-MM-dd') }}.json",
"fileContent": "={{ JSON.stringify($json.papers, null, 2) }}"
},
"typeVersion": 1
},
{
"id": "export-csv",
"name": "Exportar CSV",
"type": "n8n-nodes-base.writeBinaryFile",
"position": [
1450,
450
],
"parameters": {
"fileName": "search_results_{{ $now.format('yyyy-MM-dd') }}.csv",
"fileContent": "={{ $json.papers.map(p => [p.title, p.authors.join(';'), p.year, p.doi, p.totalCitations, p.pdfUrl].join(',\t')).join('\\n') }}"
},
"typeVersion": 1
}
],
"connections": {
"generate-bibtex": {
"main": [
[
{
"node": "export-bibtex",
"type": "main",
"index": 0
},
{
"node": "export-json",
"type": "main",
"index": 0
},
{
"node": "export-csv",
"type": "main",
"index": 0
}
]
]
},
"rank-results": {
"main": [
[
{
"node": "generate-bibtex",
"type": "main",
"index": 0
}
]
]
},
"deduplicate": {
"main": [
[
{
"node": "rank-results",
"type": "main",
"index": 0
}
]
]
},
"search-params": {
"main": [
[
{
"node": "pdfvector-search",
"type": "main",
"index": 0
}
]
]
},
"pdfvector-search": {
"main": [
[
{
"node": "deduplicate",
"type": "main",
"index": 0
}
]
]
}
}
}¿Cómo usar este flujo de trabajo?
Copie el código de configuración JSON de arriba, cree un nuevo flujo de trabajo en su instancia de n8n y seleccione "Importar desde JSON", pegue la configuración y luego modifique la configuración de credenciales según sea necesario.
¿En qué escenarios es adecuado este flujo de trabajo?
Intermedio - RAG de IA, IA Multimodal
¿Es de pago?
Este flujo de trabajo es completamente gratuito, puede importarlo y usarlo directamente. Sin embargo, tenga en cuenta que los servicios de terceros utilizados en el flujo de trabajo (como la API de OpenAI) pueden requerir un pago por su cuenta.
Flujos de trabajo relacionados recomendados
PDF Vector
@pdfvectorA fully featured PDF APIs for developers - Parse any PDF or Word document, extract structured data, and access millions of academic papers - all through simple APIs.
Compartir este flujo de trabajo