Rappel d'e-mails sur les tendances IA - Weaviate
Ceci est unContent Creation, AI RAGworkflow d'automatisation du domainecontenant 48 nœuds.Utilise principalement des nœuds comme Set, Xml, Merge, DateTime, Markdown. Construire un alerteur hebdomadaire sur les tendances de l'IA basé sur arXiv et Weaviate
- •Peut nécessiter les informations d'identification d'authentification de l'API cible
- •Clé API OpenAI
Nœuds utilisés (48)
Catégorie
{
"id": "sgcKe5gsmJFdEAe3",
"meta": {
"instanceId": "be3e0177f1eeda5879f300082f54531dfa9819a5d7441e94ea64b32f8b1fd49c",
"templateCredsSetupCompleted": true
},
"name": "ai-trend-email-alerter-weaviate",
"tags": [],
"nodes": [
{
"id": "d36fdcbc-4add-46b5-a440-af07d1b81d56",
"name": "Weaviate Stockage vectoriel",
"type": "@n8n/n8n-nodes-langchain.vectorStoreWeaviate",
"position": [
3440,
752
],
"parameters": {
"mode": "insert",
"options": {
"textKey": "summary"
},
"weaviateCollection": {
"__rl": true,
"mode": "id",
"value": "ArxivArticles"
}
},
"credentials": {
"weaviateApi": {
"id": "qiTSL6FfsPCZLyUv",
"name": "Weaviate Credentials account"
}
},
"typeVersion": 1.2
},
{
"id": "04f15082-cb14-46b2-8e92-e2926e7de128",
"name": "Default Data Loader",
"type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
"position": [
3440,
1072
],
"parameters": {
"options": {
"metadata": {
"metadataValues": [
{
"name": "arxiv_id",
"value": "={{ $('Remove Fields').item.json.id }}"
},
{
"name": "published",
"value": "={{ $('Remove Fields').item.json.published }}"
},
{
"name": "author",
"value": "={{ $('Remove Fields').item.json.author }}"
},
{
"name": "title",
"value": "={{ $('Remove Fields').item.json.title }}"
},
{
"name": "category",
"value": "={{ $('Remove Fields').item.json.category }}"
},
{
"name": "primary_topic",
"value": "={{ $('Remove Fields').item.json.primary_topic }}"
},
{
"name": "=secondary_topics",
"value": "={{ $('Remove Fields').item.json.secondary_topics }}"
},
{
"name": "potential_impact",
"value": "={{ $('Remove Fields').item.json.potential_impact }}"
}
]
}
},
"jsonData": "={{ $('Remove Fields').item.json.summary }}",
"jsonMode": "expressionData"
},
"typeVersion": 1
},
{
"id": "ff6e5918-4b72-4902-828b-aa6e39fdd378",
"name": "Query arXiv",
"type": "n8n-nodes-base.httpRequest",
"position": [
528,
752
],
"parameters": {
"url": "=https://export.arxiv.org/api/query?search_query=cat:cs.LG+OR+cat:stat.ML&sortBy=submittedDate&sortOrder=descending&start=0&max_results=200&last_update_date_from={{ $('Date & Time').item.json.startDate.toDateTime().toFormat(\"yyyyMMdd\") }}\n",
"options": {}
},
"typeVersion": 4.2
},
{
"id": "33c6d11a-6b27-4297-8fe5-08260f474871",
"name": "Convert XML to JSON",
"type": "n8n-nodes-base.xml",
"position": [
832,
752
],
"parameters": {
"options": {}
},
"typeVersion": 1
},
{
"id": "fe135a0c-1976-444f-bd65-9d26d6e969ef",
"name": "Supprimer les doublons",
"type": "n8n-nodes-base.removeDuplicates",
"position": [
1424,
752
],
"parameters": {
"compare": "={{ $json.id }}",
"options": {}
},
"typeVersion": 2
},
{
"id": "1082f1b2-94de-4ea5-83a6-83242d065233",
"name": "Simple Mémoire",
"type": "@n8n/n8n-nodes-langchain.memoryBufferWindow",
"position": [
976,
2368
],
"parameters": {
"sessionKey": "sessionId",
"sessionIdType": "customKey"
},
"typeVersion": 1.3
},
{
"id": "780950ec-78b6-4c84-814c-03c3c6881b2c",
"name": "Incorporations OpenAI",
"type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
"position": [
3328,
1072
],
"parameters": {
"options": {}
},
"credentials": {
"openAiApi": {
"id": "v6dOwJXW6XXHxHQw",
"name": "OpenAi account"
}
},
"typeVersion": 1.2
},
{
"id": "119e0896-430f-4019-8fe6-8677cd0e9289",
"name": "OpenRouter Chat Model",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenRouter",
"position": [
816,
2368
],
"parameters": {
"model": "anthropic/claude-3.7-sonnet",
"options": {
"temperature": 2
}
},
"credentials": {
"openRouterApi": {
"id": "V30JHJHSU29vq3Zj",
"name": "OpenRouter account"
}
},
"typeVersion": 1
},
{
"id": "0270ad87-f27a-491a-bb95-7983cb1b4d80",
"name": "Weaviate Stockage vectoriel1",
"type": "@n8n/n8n-nodes-langchain.vectorStoreWeaviate",
"position": [
80,
2288
],
"parameters": {
"mode": "retrieve-as-tool",
"options": {},
"toolName": "ArxivPapers",
"toolDescription": "This tool allows you to query the Weaviate Vector Store1 to retrieve arXiv article titles, summary and other metadata to be used as the sole data source performing a trend analysis. You must query the database to get information for the trend analysis.",
"weaviateCollection": {
"__rl": true,
"mode": "id",
"value": "ArxivArticles"
}
},
"credentials": {
"weaviateApi": {
"id": "qiTSL6FfsPCZLyUv",
"name": "Weaviate Credentials account"
}
},
"typeVersion": 1.2
},
{
"id": "780c692e-3444-4293-aeee-70241d151011",
"name": "Incorporations OpenAI1",
"type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
"position": [
64,
2432
],
"parameters": {
"options": {}
},
"credentials": {
"openAiApi": {
"id": "v6dOwJXW6XXHxHQw",
"name": "OpenAi account"
}
},
"typeVersion": 1.2
},
{
"id": "798c5bdc-f46a-45d2-8692-8875efd9cb6e",
"name": "Date et heure",
"type": "n8n-nodes-base.dateTime",
"position": [
240,
752
],
"parameters": {
"options": {},
"duration": 7,
"magnitude": "={{ $json.currentDate }}",
"operation": "subtractFromDate",
"outputFieldName": "startDate"
},
"typeVersion": 2
},
{
"id": "56d6ff4b-3ae0-4450-b899-9773e83ce896",
"name": "Markdown",
"type": "n8n-nodes-base.markdown",
"position": [
1088,
1584
],
"parameters": {
"mode": "markdownToHtml",
"options": {
"simplifiedAutoLink": true
},
"markdown": "={{ $json.output.body }}",
"destinationKey": "=data"
},
"typeVersion": 1
},
{
"id": "d62c633e-d6d2-4d0e-9645-a909ae4a0efa",
"name": "Recursive Character Text Splitter1",
"type": "@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter",
"position": [
3552,
1184
],
"parameters": {
"options": {},
"chunkSize": 2000
},
"typeVersion": 1
},
{
"id": "c2040ff2-808d-4f6e-8682-06f2aad3cf8b",
"name": "Structured Output Parser",
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
"position": [
1136,
2368
],
"parameters": {
"jsonSchemaExample": "{\n \"subject\":\"...\",\n \"body\":\"...\"\n}"
},
"typeVersion": 1.2
},
{
"id": "5d07a9fd-ff2a-4536-aa13-171e4eeeac42",
"name": "Déclencheur planifié",
"type": "n8n-nodes-base.scheduleTrigger",
"position": [
-128,
752
],
"parameters": {
"rule": {
"interval": [
{
"daysInterval": 7
},
{}
]
}
},
"typeVersion": 1.2
},
{
"id": "913e8a25-32af-4c2c-943c-58ab2563accd",
"name": "Note adhésive7",
"type": "n8n-nodes-base.stickyNote",
"position": [
0,
560
],
"parameters": {
"color": 5,
"width": 380,
"height": 340,
"content": "## 1. Specify date range for weekly automation\n1. Calculate today's date\n2. Calculate dates for the last week based on today"
},
"typeVersion": 1
},
{
"id": "704752b7-75cf-40d8-9b31-fc8e43ab18b2",
"name": "Note adhésive8",
"type": "n8n-nodes-base.stickyNote",
"position": [
400,
560
],
"parameters": {
"color": 5,
"width": 360,
"height": 452,
"content": "## 2. Fetch weekly articles from arXiv.\nFetch ML article abstracts by querying the free arXiv API.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nThe number of maximum papers returned is set by default to 200. You can change this by editing `max_results` in the query node."
},
"typeVersion": 1
},
{
"id": "9529c98c-a28c-4512-a076-9e304d82125a",
"name": "Note adhésive9",
"type": "n8n-nodes-base.stickyNote",
"position": [
784,
560
],
"parameters": {
"color": 5,
"width": 820,
"height": 420,
"content": "## 3. Pre-process data\n1. Convert XML response to JSON.\n2. Split results by article ID.\n3. Format data for Weaviate.\n4. Remove any duplicates, if they exist.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSpecifically, we are merging fields that have multiple entries, like `author`, and `category` into an Array of strings and adding datetime formatting to the publication date."
},
"typeVersion": 1
},
{
"id": "57c8762e-5afd-4a9c-b3d0-61fbd863bc36",
"name": "Note adhésive10",
"type": "n8n-nodes-base.stickyNote",
"position": [
3104,
560
],
"parameters": {
"color": 5,
"width": 840,
"height": 380,
"content": "## 6. Create a new Weaviate collection in existing cloud cluster\n1. Connect to your Weaviate Cloud or local credentials.\n2. Set `Operation Mode` to `Insert Documents`.\n3. To insert new data into a **new** `Weaviate Collection`, select `By ID` and give your collection a name in SnakeCase format.\n4. To insert data into an **existing** `Weaviate Collection`, select `From List` and choose your existing collection from the drop down menu.\n\n\n \n\n\n\n\n\n\n\n\n\n5. Under `Options`, click `Add Option` and select `Text Key`. This is the field for which embeddings will be generated. In this example, we are embedding the `summary` field in our data, as this is the abstract text for the articles."
},
"typeVersion": 1
},
{
"id": "b0e70958-64a7-4b5f-ae7e-e6350757eb49",
"name": "Note adhésive11",
"type": "n8n-nodes-base.stickyNote",
"position": [
3264,
960
],
"parameters": {
"color": 5,
"width": 560,
"height": 580,
"content": "## 7. Configure components for embeddings\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n__Embeddings Node__\n1. Select your embedding provider and model connect it to your credentials.\n \n__Data Loader__\n2. The `Type of Data` we're loading is `JSON`.\n3. If you are including metadata (we are), set `Mode` to `Load Specific Data`.\n4. The `Data` field represents the data we want to generate embeddings for.\n5. Under `Options` we define our metadata schema.\n\n__Text Splitter__\n6. We're using the Recursive Character Text Splitter and setting our `Chunk Size` to `2000` to get one chunk per abstract (but feel free to experiment!)"
},
"typeVersion": 1
},
{
"id": "3b505c27-caa0-4f25-8121-90d39b9a63b3",
"name": "Note adhésive12",
"type": "n8n-nodes-base.stickyNote",
"position": [
3968,
560
],
"parameters": {
"color": 5,
"width": 700,
"height": 340,
"content": "## 8. Confirm that articles have been uploaded.\nThese steps serve as verification that the new weekly articles have been successfully uploaded into Weaviate before proceeding to run the AI Agent node. The list of articles will generate a static `session_id` that serves as that verification.\n1. Get a list of all the arXiv ID's just uploaded into Weaviate.\n2. Generate a static `session_id` that will serve as a trigger for the AI Agent node."
},
"typeVersion": 1
},
{
"id": "64098a08-06fd-4981-9d46-6efe61e01f81",
"name": "Note adhésive13",
"type": "n8n-nodes-base.stickyNote",
"position": [
32,
1392
],
"parameters": {
"color": 6,
"width": 660,
"height": 500,
"content": "## 1. Configure the AI Agent node with Weaviate as a _TOOL_\n1. Set `Source for Prompt (User Message)` to `Define below`.\n2. In `Prompt (User Message)`, give a short explanation of the retrieval and analysis tasks to be performed.\n3. Under `Options`, click `Add Option` to add the `System Message`, also known as the system prompt. Provide specific instructions for the agent, including:\n\n\n\n\n\n\n\n\n\n\n\n\n\n* Instructions to query the Weaviate vector store\n* Explanation and schema of data in Weaviate\n* Instructions on how to determine trends\n* Specified output format\n* Example of real output\n\n"
},
"typeVersion": 1
},
{
"id": "24a428b0-8ee2-4e0d-9c36-c0934bcc13d3",
"name": "Note adhésive14",
"type": "n8n-nodes-base.stickyNote",
"position": [
32,
1920
],
"parameters": {
"color": 6,
"width": 500,
"height": 640,
"content": "## 3. Configure Weaviate Vector Store\n1. Select the same credentials (cloud or local) to connect with as earlier.\n2. Set `Operation Mode` to `Retrieve Documents as Tool for AI Agent`.\n3. Add a `Description` to the tool that tells the LLM how to use the Weaviate vector store. **This description is very important! A poor tool description can result in the agent not using the tool. A good description includes, what the tools allows the agent to do and what the agent should do with the tool. You can be forceful in your tone, here.**\n4. Connect to existing vector store by selecting `By ID` and type in the same collection name as above: `ArxivArticles`.\n5. Turn on `Include Metadata`, as publication dates, titles, and arXiv URLs are all critical to the agent's response.\n6. Select OpenAI as the embedding provider for the query."
},
"typeVersion": 1
},
{
"id": "3af4b9b8-5929-4bff-af3f-7a200d29ea34",
"name": "Note adhésive15",
"type": "n8n-nodes-base.stickyNote",
"position": [
544,
1920
],
"parameters": {
"color": 6,
"width": 744,
"height": 640,
"content": "## 4. Add model, memory, and output parser\n1. We're using the OpenRouter Chat Model node so that we can use Google Gemini for the agent.\n2. Add simple (short-term) memory to the agent, setting `Session ID` to `Define Below` and `Key` to `sessionId` (which is our static session ID because we are only using short-term memory and not storing the agent output for future use.\n3. Add the `Structured Output Parser` node and set `Schema Type` to `Generate from JSON Example`.\n4. In `JSON Example`, we define the format of the output for the agent, which will be a subject line and body text for the resulting summary email we want to generate:\n\n```\n{\n \"subject\": \"...\",\n \"body\": \"...\"\n}\n```"
},
"typeVersion": 1
},
{
"id": "bc738a22-4e2d-4710-88ee-1186bb29b759",
"name": "Note adhésive16",
"type": "n8n-nodes-base.stickyNote",
"position": [
704,
1392
],
"parameters": {
"color": 6,
"width": 600,
"height": 400,
"content": "## 2. Post-process agent response\n1. Use the `Edit Fields` node to add an expression that replaces \"\\n\\n\" in the agent's response with actual page breaks (so that it can be properly parsed as markdown text).\n2. Add the `Markdown to HTML` node to convert the body text of the email to HTML. \n3. We give the output a name in `Destination Key`.\n4. Add the option for `Automatic Linking to URLs`."
},
"typeVersion": 1
},
{
"id": "afc29ee4-c944-4c49-abef-6356a84975f9",
"name": "Note adhésive17",
"type": "n8n-nodes-base.stickyNote",
"position": [
1328,
1392
],
"parameters": {
"color": 6,
"width": 520,
"height": 400,
"content": "## 5. Send the output as an email!\n1. Select your `STMP Account` credential.\n2. Set the `Subject` equal to the subject key `{{ $json.output.subject }}`.\n3. Set `Email Format` to `HTML`.\n4. Set `HTML` to `{{ $json.data }}` (the post-processed body text)."
},
"typeVersion": 1
},
{
"id": "fc89c7eb-a1b2-4692-aed4-b075cbb6793b",
"name": "Note adhésive18",
"type": "n8n-nodes-base.stickyNote",
"position": [
0,
400
],
"parameters": {
"color": 5,
"width": 1600,
"height": 140,
"content": "# Part 1: Fetch, clean, enrich and insert arXiv abstracts into Weaviate\nIn the first part of this workflow, we activate a `Schedule Trigger` to fetch AI and ML abstracts (along with their metadata like article title, authors, publication date, etc.) on a weekly basis. We clean the data and then enrich it with a LLM that will predict label it with topic categories and predict each article's potential impact in the field. Finally, we upload the enriched and cleaned data into a Weaviate collection and verify that the data has been uploaded."
},
"typeVersion": 1
},
{
"id": "1f244dc1-f564-4a01-b582-49484c3e7d63",
"name": "Note adhésive19",
"type": "n8n-nodes-base.stickyNote",
"position": [
32,
1232
],
"parameters": {
"color": 6,
"width": 1580,
"height": 140,
"content": "# Part 2: Use agentic RAG to identify research trends and send them in an email\nIn this part of the workflow, we configure an AI Agent node to work with Weaviate as a tool. The entire embedded collection of embedded article abstracts, along with their metadata, are at the disposal of the agent. We instruct the agent to use Weaviate as a tool, describe available input data, and give directions for how to identify trends and structure the summary email. We clean up the data and then send it off via email."
},
"typeVersion": 1
},
{
"id": "75d64ce1-57c6-4b0f-80e4-66dbe8488397",
"name": "OpenRouter Chat Model1",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenRouter",
"position": [
1952,
1088
],
"parameters": {
"model": "anthropic/claude-3.7-sonnet",
"options": {}
},
"credentials": {
"openRouterApi": {
"id": "V30JHJHSU29vq3Zj",
"name": "OpenRouter account"
}
},
"typeVersion": 1
},
{
"id": "901c7f80-f856-4d90-8046-8b6fe2e45056",
"name": "Structured Output Parser1",
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
"position": [
2224,
1088
],
"parameters": {
"autoFix": true,
"jsonSchemaExample": "{\n\t\"primary_category\": \"LLM Fine-tuning\",\n\t\"secondary_categories\": [\"Parameter-Efficient Fine-tuning (PEFT)\", \"Data-centric AI\"],\n \"potential_impact\": 1\n}"
},
"typeVersion": 1.3
},
{
"id": "dbea7aee-5dae-457e-8b5c-343d18188fb0",
"name": "Fusionner",
"type": "n8n-nodes-base.merge",
"position": [
2704,
752
],
"parameters": {
"mode": "combine",
"options": {},
"combineBy": "combineByPosition"
},
"typeVersion": 3.2
},
{
"id": "5df76f3f-0a71-4999-a85f-375c680fe073",
"name": "OpenRouter Chat Model2",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenRouter",
"position": [
2288,
1232
],
"parameters": {
"model": "anthropic/claude-3.7-sonnet",
"options": {}
},
"credentials": {
"openRouterApi": {
"id": "V30JHJHSU29vq3Zj",
"name": "OpenRouter account"
}
},
"typeVersion": 1
},
{
"id": "0bfa6914-5450-494a-9c1b-cccd58ae4d4c",
"name": "Note adhésive",
"type": "n8n-nodes-base.stickyNote",
"position": [
1632,
560
],
"parameters": {
"color": 5,
"width": 980,
"height": 460,
"content": "## 4. Enrich arXiv articles with topic classifications and potential impact predictions \n1. Set `Source for Prompt (User Message)` to `Define below`.\n2. In `Prompt (User Message)`, give a short explanation of the classification task and point the agent towards the article title and abstract and fields.\n3. Under `Options`, click `Add Option` to add the `System Message`, also known as the system prompt. Enter specific instructions for making the classifications, including defining our categories:\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n * `primary_category`: (string) The single most relevant primary category for the paper out of a list of pre-defined topics.\n * `secondary_categories`: (array of strings) Up to two additional relevant secondary categories.\n * `potential_impact`: (integer) An integer score from 1 to 5 representing how impactful the research conclusions may be in the field at large.\n* Make sure that `Require Specific Output Format` is enabled."
},
"typeVersion": 1
},
{
"id": "aab2d979-238c-450d-9d3f-0ff085f90730",
"name": "Note adhésive1",
"type": "n8n-nodes-base.stickyNote",
"position": [
1920,
1040
],
"parameters": {
"color": 5,
"width": 592,
"height": 380,
"content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nChoose your models for the agent and structured output parser (we use `claude-3.7-sonnet`)."
},
"typeVersion": 1
},
{
"id": "dade3fcb-63c5-4c34-8287-9270a7758960",
"name": "Note adhésive2",
"type": "n8n-nodes-base.stickyNote",
"position": [
2624,
560
],
"parameters": {
"color": 5,
"width": 460,
"height": 340,
"content": "## 5. Post-process enriched data\n1. Merge the output from AI agent with the existing article data.\n2. Get rid of the redundant `\"output\"` field in the JSON."
},
"typeVersion": 1
},
{
"id": "7e3b842d-aba8-41a8-95ec-94917017998d",
"name": "Get Current Date",
"type": "n8n-nodes-base.dateTime",
"position": [
48,
752
],
"parameters": {
"options": {}
},
"typeVersion": 2
},
{
"id": "1356f9bc-87eb-4f69-871c-0bdc1591e2c9",
"name": "Split Results",
"type": "n8n-nodes-base.splitOut",
"position": [
1024,
752
],
"parameters": {
"options": {},
"fieldToSplitOut": "feed.entry"
},
"typeVersion": 1
},
{
"id": "6f37e9bb-e5c0-493a-905d-87b55b874f49",
"name": "Prep Data for Weaviate",
"type": "n8n-nodes-base.set",
"position": [
1232,
752
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "428846a4-8555-48cc-aded-bb0beb5fb123",
"name": "id",
"type": "string",
"value": "={{ $json.id }}"
},
{
"id": "40614d42-547f-48d6-8fde-4d94b7a4963c",
"name": "title",
"type": "string",
"value": "={{ $json.title }}"
},
{
"id": "1bcd0610-3a4a-4ee8-aa78-673319282cf5",
"name": "summary",
"type": "string",
"value": "={{ $json.summary }}"
},
{
"id": "d49b5557-9379-4132-b2c4-cf155aca7428",
"name": "author",
"type": "string",
"value": "={{\n Array.isArray($json.author)\n ? $json.author.map(author => author.name)\n : ($json.author && $json.author.name ? [$json.author.name] : [])\n}}"
},
{
"id": "ed8d5a76-bc40-4fa7-9f19-61f2a3429f1d",
"name": "published",
"type": "string",
"value": "={{ new Date($json.published).toISOString() }}"
},
{
"id": "6e6fbfb5-d2bb-474f-9885-d65438c8b271",
"name": "category",
"type": "string",
"value": "={{ Array.isArray($json.category) ? $json.category.map(category => category.term) : ($json.category && $json.category.term ? [$json.category.term] : []) }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "69af1ed4-0a73-4179-aff2-d78253e7921a",
"name": "Enrich Articles with Topic Classification",
"type": "@n8n/n8n-nodes-langchain.agent",
"onError": "continueErrorOutput",
"position": [
1984,
752
],
"parameters": {
"text": "=Classify the following arXiv papers:\n\n```json\nTitle: {{ $json.title }}\nAbstract: {{ $json.summary }}",
"options": {
"systemMessage": "=You are an expert AI agent designed to classify academic research papers. Your task is to analyze the provided arXiv paper data and categorize it based on its content.\n\nInput Data Schema:\nThe input data will be a JSON object with the following structure:\n\n{\n \"title\": \"string\",\n \"summary\": \"string\"\n}\n\nYou MUST respond with a JSON object containing the following fields:\n\n\"primary_category\": (string) The single most relevant primary category for the paper. You MUST choose one category from the following predefined list. Do NOT use any category not on this list.\n\n\"secondary_categories\": (array of strings) Up to two additional relevant secondary categories. These are optional; if no secondary categories apply, provide an empty array []. If chosen, they MUST also be from the predefined list.\n\n\"potential_impact\": (integer) An integer score from 1 to 5, judging the paper's potential impact based on these criteria:\n\n1: Papers with no new existing information or limited results.\n\n2: Papers with minor incremental contributions or limited novelty.\n\n3: Papers with solid contributions, good results, and clear utility, but not groundbreaking.\n\n4: Papers with significant advancements, novel approaches, or strong potential to influence the field.\n\n5: Papers that are potential game-changers, representing paradigm shifts, or opening entirely new research directions.\n\nPredefined Categories and Definitions:\n\nFoundation Models: Models trained on broad data at scale, designed to be adaptable to a wide range of downstream tasks (e.g., large language models, large vision models, multi-modal models).\n\nLLM Fine-tuning: Techniques and methodologies for adapting pre-trained Large Language Models (LLMs) to specific tasks or datasets.\n\nParameter-Efficient Fine-tuning (PEFT): Methods that enable efficient adaptation of large pre-trained models to new tasks with minimal computational cost, by updating only a small subset of parameters (e.g., LoRA, Prompt Tuning).\n\nRetrieval-Augmented Generation (RAG): Architectures or systems that combine generative models (like LLMs) with information retrieval mechanisms to enhance the factual accuracy and relevance of generated outputs by referencing external knowledge bases.\n\nModel Quantization: Techniques for reducing the precision of model parameters (e.g., from float32 to int8) to decrease model size, memory footprint, and computational requirements, often for efficient deployment on edge devices.\n\nAgentic AI / AI Agents: Systems designed for autonomous decision-making, planning, and action in dynamic environments, often involving reasoning, memory, and tool use.\n\nMultimodality: Models capable of processing, understanding, and generating content across multiple data types or modalities (e.g., text and images, audio and video).\n\nReinforcement Learning: A paradigm where an agent learns to make decisions by performing actions in an environment to maximize a cumulative reward, often through trial and error.\n\nComputer Vision (Specific Techniques): Papers focusing on particular computer vision tasks or methodologies that are not primarily about foundation models (e.g., 3D reconstruction, object detection, image segmentation, pose estimation).\n\nNatural Language Processing (Specific Techniques): Papers focusing on particular NLP tasks or methodologies that are not primarily about foundation models or LLM fine-tuning (e.g., text summarization, machine translation, sentiment analysis, named entity recognition).\n\nEthical AI / AI Safety: Research addressing the societal implications of AI, including fairness, bias detection and mitigation, interpretability, transparency, privacy, and alignment with human values.\n\nEfficient AI / AI Optimization: Techniques aimed at improving the computational efficiency, speed, or resource usage of AI models beyond just quantization, including architecture search, inference optimization, and hardware-aware design.\n\nData-centric AI: Approaches that prioritize improving the quality, quantity, and organization of data used to train AI models, rather than solely focusing on model architecture improvements.\n\nOther: A catch-all category for articles that don't fall into one of the classes mentioned above.\n\nFocus solely on the content of the paper's title, summary, and categories to make your classification. Do NOT include any conversational text or explanations in your response, only the JSON object."
},
"promptType": "define"
},
"retryOnFail": true,
"typeVersion": 2
},
{
"id": "36068ba8-4e86-403c-b6a6-7fe55af58a95",
"name": "Remove Fields",
"type": "n8n-nodes-base.set",
"position": [
2912,
752
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "bd542bdd-d919-4688-a34d-395dd003e832",
"name": "id",
"type": "string",
"value": "={{ $json.id }}"
},
{
"id": "714d07a2-0c05-4d12-bba4-b37ca21c6521",
"name": "title",
"type": "string",
"value": "={{ $json.title }}"
},
{
"id": "ec2e4ccb-70ec-4d12-8d5b-8b5359b93315",
"name": "summary",
"type": "string",
"value": "={{ $json.summary }}"
},
{
"id": "acae59b4-d167-42e5-8f07-f799d2cefcdf",
"name": "author",
"type": "string",
"value": "={{ $json.author }}"
},
{
"id": "0b74d264-c314-4522-9e7e-37d6a13e4247",
"name": "published",
"type": "string",
"value": "={{ $json.published }}"
},
{
"id": "e6f4a0a9-8b6c-47ab-9abb-29d530c21f2c",
"name": "category",
"type": "string",
"value": "={{ $json.category }}"
},
{
"id": "a808ca6a-6847-4730-a0ed-2b1dbdb5ad8a",
"name": "primary_topic",
"type": "string",
"value": "={{ $json.output.primary_category }}"
},
{
"id": "4f82732d-4f67-4047-bd67-6c00dd1c9a80",
"name": "secondary_topics",
"type": "string",
"value": "={{ $json.output.secondary_categories }}"
},
{
"id": "322c2946-8a90-451a-b484-bc1da68fb178",
"name": "potential_impact",
"type": "number",
"value": "={{ parseFloat($json.output.potential_impact) }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "af99113b-8804-47bf-a74d-2265f63b97e1",
"name": "Agréger Uploaded arXiv IDs",
"type": "n8n-nodes-base.aggregate",
"position": [
4112,
752
],
"parameters": {
"options": {},
"fieldsToAggregate": {
"fieldToAggregate": [
{
"fieldToAggregate": "metadata.arxiv_id"
}
]
}
},
"typeVersion": 1
},
{
"id": "92511b85-b11d-4fd7-9985-326051db472a",
"name": "Add Static sessionId",
"type": "n8n-nodes-base.set",
"position": [
4400,
752
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "dea40507-a706-4792-b0b4-673d655ec877",
"name": "sessionId",
"type": "string",
"value": "static_id"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "f7e31525-1ac9-4adc-b4c6-9159e1b96e6e",
"name": "Agentic RAG for Trend Analysis",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
48,
1584
],
"parameters": {
"text": "=Please provide a summarized trend analysis using the articles stored in the Weaviate vector store.",
"options": {
"systemMessage": "=**CRITICAL: YOUR FINAL RESPONSE MUST CONTAIN ABSOLUTELY NO MARKDOWN CODE FENCES (```JSON) OR ANY OTHER TEXT OUTSIDE THE PURE JSON OBJECT. IMMEDIATELY PROVIDE THE JSON.**\n\nYou are an expert AI and Machine Learning research analyst, specifically tasked with synthesizing weekly trends from arXiv publications. Your goal is to generate a concise, approachable, and easy-to-read email summary for a general audience interested in the latest developments in AI and ML. You must use the Weaviate Vector Store1 and perform a proper vector search to retrieve the data. If you were unable to retrieve data, please repeatedly try again until you can. Do not hallucinate, do not make anything up, do not rely on your memory.\n\n**Input Data:**\nYou will receive an array of JSON objects, where each object represents a single AI/ML article published on arXiv in the last week. You MUST analyze this entire collection of articles to identify key trends and notable research. Each article object has the following fields:\n- `arxiv_id`: The FULL arXiv URL of the article. (e.g. \"http://arxiv.org/abs/2507.02863v1\")\n- `title`: The title of the article.\n- `main_findings`: The main findings of the paper.\n- `primary_topic`: The primary topic assigned to the article (e.g., \"Language Model Evaluation\").\n- `secondary_topics`: A list of secondary topics assigned to the article (e.g., [\"Generative AI\", \"Natural Language Processing\"]).\n- `paper_quality`: A numerical score from 1 (very poor) to 5 (excellent quality), representing the predicted research quality.\n- `potential_impact`: A numerical score from 1 (not important) to 5 (potential paradigm shift, huge game-changer), representing the predicted impact on the field.\n\nYou will also recieve an array of JSONs with article counts by primary topic. Incorporate this into your trend anaylsis by focusing on topics with the most number of publications. Use this to guide the rest of the analysis\n\n**Trend Identification and Selection Rules:**\n- **Get article counts by `primary_topic`:** First, perform an aggregate query in the Weaviate vector store to get the counts of articles published by `primary_topic`. Use these counts to inform the rest of your analysis, focusing on the most-published topics. \n- **Identify Key Trends:** Group papers by `primary_topic` and `secondary_topics`. Prioritize trends with a higher number of associated papers, or papers with higher `potential_impact` or `paper_quality` scores.\n- **Select Representative Papers for Citation:** For each trend you summarize, choose 1-2 representative papers. These should ideally be the highest `potential_impact` or `paper_quality` papers relevant to that trend from the provided input data.\n- **Infer Future Outlook:** Based on the collective summaries and topics of the provided articles, infer and summarize likely future research directions.\n\n**Output Format (Strict JSON):**\nYour entire response MUST be **only** a valid JSON object.\n- **DO NOT include any Markdown code fences (```json) or any other text before or after the JSON object.**\n- The JSON object MUST contain exactly two top-level keys: \"subject\" and \"body\".\n- The value for \"subject\" MUST be a single string. It MUST always start with \"✨ ML Weekly Update:\" and the rest should be a succinct summary of the report's top trends for the week.\n- The value for \"body\" MUST be a single string representing the full email content in Markdown format. Use `\\\\n` for newlines within the body string. All double quotes within string values MUST be escaped as `\\\"`.\n- You must cite all of your conclusions with the matching `arxiv_id` for the paper. Cite the in markdown format so that the user can click on a word related to the paper which is linked to the `arxiv_id`.\n\nHere is an example of a citation:\n...including [MOTIF](http://arxiv.org/abs/2507.02851v1) which enables modular thinking beyond context limitations\n\nUse ONLY data from the `arxiv_id` field for citations. Do NOT create an arXiv_id spontaneously.\n\nExample JSON Output (strictly adhere to this format):\n```json\n{\n \"subject\": \"✨ ML Weekly Update: BERT gets an upgrade, new PEFT techniques, and ML for radiology\",\n \"body\": \"Rest of email in markdown format here.\"\n}\n\nFollow this format strictly for the email, giving the body output in markdown format:\n\nHey there,\n\nHere's a quick rundown of the key trends in Machine Learning research from the past week.\n\n## 💫 Key Research Trends This Week\nA one-sentence summary of the main trends covered in the report.\n* Bullet 1: A specific trend explained in 1 sentence or less, with the corresponding `arxiv_id` from the input report.\n* Bullet 2: A specific trend explained in 1 sentence or less, with the corresponding `arxiv_id` from the input report.\n* Bullet 3: A specific trend explained in 1 sentence or less, with the corresponding `arxiv_id` from the input report.\n** Write a maximum of 3 bullet points **\n\n## 🔮 Future Research Directions\nA one-sentence summary of future research directions from the report.\n* Bullet 1: A specific prediction explained in 1 sentence or less.\n* Bullet 2: A specific prediction explained in 1 sentence or less.\n* Bullet 3: A specific prediction explained in 1 sentence or less.\n** Write a maximum of 3 bullet points **\n\nGive a one sentence summary of the email. Then follow it with some short tips on what to look for in terms of new developments over the coming week.\n\nUntil next week,\n\nArchi 🧑🏽🔬"
},
"promptType": "define"
},
"retryOnFail": true,
"typeVersion": 2
},
{
"id": "bb101516-818c-48e5-87d8-1a9f98d0f3a2",
"name": "Post Process Data",
"type": "n8n-nodes-base.set",
"position": [
832,
1584
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "42cf92c6-730d-4abf-a578-480f71e220da",
"name": "subject",
"type": "string",
"value": "={{ $json.output.subject }}"
},
{
"id": "c1f254e2-8333-431e-a6f1-0efa38f4fd3b",
"name": "output.body",
"type": "string",
"value": "={{ $json.output.body.replace(/\\\\n/g, '\\n') }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "f5f96c9f-08ad-4707-a24e-d3c518218778",
"name": "Send email",
"type": "n8n-nodes-base.emailSend",
"position": [
1520,
1584
],
"webhookId": "2fc3d54c-6de7-4f75-aa5d-f8cf90a4d1c4",
"parameters": {
"html": "={{ $json.data }}",
"options": {},
"subject": "={{ $json.subject }}"
},
"credentials": {
"smtp": {
"id": "isYLskFLHSByAySn",
"name": "SMTP account"
}
},
"typeVersion": 2.1
},
{
"id": "e61ea149-eae7-43bc-a308-f791ea408dec",
"name": "Note adhésive3",
"type": "n8n-nodes-base.stickyNote",
"position": [
0,
0
],
"parameters": {
"color": 4,
"width": 1600,
"height": 380,
"content": "# Build a Weekly AI Trend Alerter with arXiv and Weaviate\n\n🧑🏽🔬 Ditch the endless scroll for AI trends. Meet Archi, your personal AI trend scout that hits you up once a week with everyone you need to know.\n\nThis workflow scrapes AI and machine learning article abstracts from [arxiv](arxiv.org), enriches them with topic categories using a LLM, and embeds them in a [Weaviate](Weaviate) vector store. The vector store is then used as a tool for agentic RAG to write a concise, easy-to-read summary of the week in research.\n\n## Prerequisites\n1. **An existing Weaviate cluster.** You can view instructions for setting up a **local cluster** with Docker [here](https://weaviate.io/developers/weaviate/installation/docker-compose#starter-docker-compose-file) or a **Weaviate Cloud** cluster [here](https://weaviate.io/developers/wcs/quickstart).\n2. **API keys** to generate embeddings and power chat models. We use a combination of [OpenRouter](https://openrouter.ai/) and [OpenAI](https://openai.com/) Feel free to switch out the models as you like.\n3. **An email address with STMP privileges**. This is the address the email will come from. In this demo we use a personal Gmail address. You can create a new credential to link a `STMP Account` using these [instructions](https://docs.n8n.io/integrations/builtin/credentials/sendemail/).\n4. **Self-hosted n8n instance.** See this [video](https://www.youtube.com/watch?v=kq5bmrjPPAY&t=108s) for how to get set up in just three minutes.\n \n\n💚 Sign up [here](https://console.weaviate.cloud/?utm_source=recipe&utm_campaign=n8n&utm_content=n8n_arxiv_template) for a 14-day free trial of Weaviate Cloud (no credit card required)."
},
"typeVersion": 1
},
{
"id": "ef75d74a-233c-4928-b527-217ce7cc956f",
"name": "Modèle de chat OpenAI",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
656,
2368
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-4.1-mini",
"cachedResultName": "gpt-4.1-mini"
},
"options": {}
},
"credentials": {
"openAiApi": {
"id": "v6dOwJXW6XXHxHQw",
"name": "OpenAi account"
}
},
"typeVersion": 1.2
},
{
"id": "ea0c28e5-5fed-4c72-954a-dd36be744bdb",
"name": "Modèle de chat OpenAI1",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
2112,
1088
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-4.1-mini"
},
"options": {}
},
"credentials": {
"openAiApi": {
"id": "v6dOwJXW6XXHxHQw",
"name": "OpenAi account"
}
},
"typeVersion": 1.2
}
],
"active": false,
"pinData": {},
"settings": {
"executionOrder": "v1"
},
"versionId": "a8db38e2-89d4-4d15-adc1-e551dfe06e16",
"connections": {
"Merge": {
"main": [
[
{
"node": "36068ba8-4e86-403c-b6a6-7fe55af58a95",
"type": "main",
"index": 0
}
]
]
},
"56d6ff4b-3ae0-4450-b899-9773e83ce896": {
"main": [
[
{
"node": "f5f96c9f-08ad-4707-a24e-d3c518218778",
"type": "main",
"index": 0
}
]
]
},
"Date & Time": {
"main": [
[
{
"node": "ff6e5918-4b72-4902-828b-aa6e39fdd378",
"type": "main",
"index": 0
}
]
]
},
"ff6e5918-4b72-4902-828b-aa6e39fdd378": {
"main": [
[
{
"node": "33c6d11a-6b27-4297-8fe5-08260f474871",
"type": "main",
"index": 0
}
]
]
},
"36068ba8-4e86-403c-b6a6-7fe55af58a95": {
"main": [
[
{
"node": "Weaviate Vector Store",
"type": "main",
"index": 0
}
]
]
},
"Simple Memory": {
"ai_memory": [
[
{
"node": "f7e31525-1ac9-4adc-b4c6-9159e1b96e6e",
"type": "ai_memory",
"index": 0
}
]
]
},
"1356f9bc-87eb-4f69-871c-0bdc1591e2c9": {
"main": [
[
{
"node": "6f37e9bb-e5c0-493a-905d-87b55b874f49",
"type": "main",
"index": 0
}
]
]
},
"7e3b842d-aba8-41a8-95ec-94917017998d": {
"main": [
[
{
"node": "Date & Time",
"type": "main",
"index": 0
}
]
]
},
"Schedule Trigger": {
"main": [
[
{
"node": "7e3b842d-aba8-41a8-95ec-94917017998d",
"type": "main",
"index": 0
}
]
]
},
"Embeddings OpenAI": {
"ai_embedding": [
[
{
"node": "Weaviate Vector Store",
"type": "ai_embedding",
"index": 0
}
]
]
},
"OpenAI Chat Model": {
"ai_languageModel": [
[
{
"node": "f7e31525-1ac9-4adc-b4c6-9159e1b96e6e",
"type": "ai_languageModel",
"index": 1
}
]
]
},
"bb101516-818c-48e5-87d8-1a9f98d0f3a2": {
"main": [
[
{
"node": "56d6ff4b-3ae0-4450-b899-9773e83ce896",
"type": "main",
"index": 0
}
]
]
},
"Remove Duplicates": {
"main": [
[
{
"node": "69af1ed4-0a73-4179-aff2-d78253e7921a",
"type": "main",
"index": 0
},
{
"node": "Merge",
"type": "main",
"index": 0
}
]
]
},
"Embeddings OpenAI1": {
"ai_embedding": [
[
{
"node": "Weaviate Vector Store1",
"type": "ai_embedding",
"index": 0
}
]
]
},
"OpenAI Chat Model1": {
"ai_languageModel": [
[
{
"node": "69af1ed4-0a73-4179-aff2-d78253e7921a",
"type": "ai_languageModel",
"index": 1
}
]
]
},
"33c6d11a-6b27-4297-8fe5-08260f474871": {
"main": [
[
{
"node": "1356f9bc-87eb-4f69-871c-0bdc1591e2c9",
"type": "main",
"index": 0
}
]
]
},
"04f15082-cb14-46b2-8e92-e2926e7de128": {
"ai_document": [
[
{
"node": "Weaviate Vector Store",
"type": "ai_document",
"index": 0
}
]
]
},
"92511b85-b11d-4fd7-9985-326051db472a": {
"main": [
[
{
"node": "f7e31525-1ac9-4adc-b4c6-9159e1b96e6e",
"type": "main",
"index": 0
}
]
]
},
"119e0896-430f-4019-8fe6-8677cd0e9289": {
"ai_languageModel": [
[
{
"node": "f7e31525-1ac9-4adc-b4c6-9159e1b96e6e",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"Weaviate Vector Store": {
"main": [
[
{
"node": "Aggregate Uploaded arXiv IDs",
"type": "main",
"index": 0
}
]
]
},
"75d64ce1-57c6-4b0f-80e4-66dbe8488397": {
"ai_languageModel": [
[
{
"node": "69af1ed4-0a73-4179-aff2-d78253e7921a",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"5df76f3f-0a71-4999-a85f-375c680fe073": {
"ai_languageModel": [
[
{
"node": "901c7f80-f856-4d90-8046-8b6fe2e45056",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"6f37e9bb-e5c0-493a-905d-87b55b874f49": {
"main": [
[
{
"node": "Remove Duplicates",
"type": "main",
"index": 0
}
]
]
},
"Weaviate Vector Store1": {
"ai_tool": [
[
{
"node": "f7e31525-1ac9-4adc-b4c6-9159e1b96e6e",
"type": "ai_tool",
"index": 0
}
]
]
},
"c2040ff2-808d-4f6e-8682-06f2aad3cf8b": {
"ai_outputParser": [
[
{
"node": "f7e31525-1ac9-4adc-b4c6-9159e1b96e6e",
"type": "ai_outputParser",
"index": 0
}
]
]
},
"901c7f80-f856-4d90-8046-8b6fe2e45056": {
"ai_outputParser": [
[
{
"node": "69af1ed4-0a73-4179-aff2-d78253e7921a",
"type": "ai_outputParser",
"index": 0
}
]
]
},
"Aggregate Uploaded arXiv IDs": {
"main": [
[
{
"node": "92511b85-b11d-4fd7-9985-326051db472a",
"type": "main",
"index": 0
}
]
]
},
"f7e31525-1ac9-4adc-b4c6-9159e1b96e6e": {
"main": [
[
{
"node": "bb101516-818c-48e5-87d8-1a9f98d0f3a2",
"type": "main",
"index": 0
}
]
]
},
"d62c633e-d6d2-4d0e-9645-a909ae4a0efa": {
"ai_textSplitter": [
[
{
"node": "04f15082-cb14-46b2-8e92-e2926e7de128",
"type": "ai_textSplitter",
"index": 0
}
]
]
},
"69af1ed4-0a73-4179-aff2-d78253e7921a": {
"main": [
[
{
"node": "Merge",
"type": "main",
"index": 1
}
]
]
}
}
}Comment utiliser ce workflow ?
Copiez le code de configuration JSON ci-dessus, créez un nouveau workflow dans votre instance n8n et sélectionnez "Importer depuis le JSON", collez la configuration et modifiez les paramètres d'authentification selon vos besoins.
Dans quelles scénarios ce workflow est-il adapté ?
Avancé - Création de contenu, RAG IA
Est-ce payant ?
Ce workflow est entièrement gratuit et peut être utilisé directement. Veuillez noter que les services tiers utilisés dans le workflow (comme l'API OpenAI) peuvent nécessiter un paiement de votre part.
Workflows recommandés
Mary Newhauser
@marynPartager ce workflow