研究论文爬虫到Google Sheets
中级
这是一个AI领域的自动化工作流,包含 12 个节点。主要使用 Set, Code, Html, HttpRequest, GoogleSheets 等节点,结合人工智能技术实现智能自动化。 使用Bright Data和n8n自动化研究论文收集
前置要求
- •可能需要目标 API 的认证凭证
- •Google Sheets API 凭证
分类
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
"id": "giq3zqaP4QbY6LgC",
"meta": {
"instanceId": "60046904b104f0f72b2629a9d88fe9f676be4035769f1f08dad1dd38a76b9480"
},
"name": "Research_Paper_Scraper_to_Google_Sheets",
"tags": [],
"nodes": [
{
"id": "7d81edf3-6f00-4634-b79f-dbda3f9958e5",
"name": "Start Scraping (Manual Trigger)",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-1080,
580
],
"parameters": {},
"typeVersion": 1
},
{
"id": "6e172db5-7483-4079-bf8a-785602526bdc",
"name": "Set Research topic",
"type": "n8n-nodes-base.set",
"position": [
-860,
580
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "b530a847-0bb2-4039-9ad0-cbc9cc4d909e",
"name": "Topic",
"type": "string",
"value": "machine+learning"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "e65d092a-6854-478c-b33e-2fc309f71ae8",
"name": "Send Request to Bright Data API",
"type": "n8n-nodes-base.httpRequest",
"position": [
-600,
580
],
"parameters": {
"url": "https://api.brightdata.com/request",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "zone",
"value": "n8n_unblocker"
},
{
"name": "url",
"value": "=https://scholar.google.com/scholar?q={{ $json.Topic }}"
},
{
"name": "country",
"value": "us"
},
{
"name": "format",
"value": "raw"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "Authorization",
"value": "Bearer 40127ac3c2b4861572c8ad4c6d2273a0ce0472cb3ea7d3ac85a74a34629067aa"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "211bae33-32c5-44e8-b306-a5e0d520a4a0",
"name": "Extract Data from HTML (Title, Author, etc.)",
"type": "n8n-nodes-base.html",
"position": [
-400,
580
],
"parameters": {
"options": {},
"operation": "extractHtmlContent",
"extractionValues": {
"values": [
{
"key": "Title",
"cssSelector": "h3.gs_rt, a.gs_rt",
"returnArray": true
},
{
"key": "Author",
"cssSelector": "div.gs_a",
"returnArray": true
},
{
"key": "Abstract",
"cssSelector": "div.gs_rs",
"returnArray": true
},
{
"key": "PDF Link\t",
"cssSelector": "a[href*='pdf']",
"returnArray": true,
"returnValue": "attribute"
}
]
}
},
"typeVersion": 1.2
},
{
"id": "9ab7ba20-8614-46c5-b57a-3749d6ae04c4",
"name": "Clean & Structure Extracted Data",
"type": "n8n-nodes-base.code",
"position": [
-200,
580
],
"parameters": {
"jsCode": "const titles = items[0].json.Title || [];\nconst authors = items[0].json.Author || [];\nconst abstracts = items[0].json.Abstract || [];\nconst pdfLinks = items[0].json[\"PDF Link\\t\"] || [];\n\nconst output = [];\n\nfor (let i = 0; i < titles.length; i++) {\n // Clean title (remove tags like [PDF][B])\n let title = titles[i].replace(/\\[.*?\\]/g, '').trim();\n\n // Clean author (remove any trailing dashes or HTML leftovers)\n let author = authors[i] ? authors[i].replace(/\\s*-\\s*.*/, '').trim() : '';\n\n // Abstract fallback\n let abstract = abstracts[i] || '';\n\n // Get PDF link — from either a single object or array of duplicates\n let linkObj = pdfLinks[i];\n let pdfLink = '';\n\n if (Array.isArray(linkObj)) {\n // If multiple objects per item\n pdfLink = linkObj.find(obj => obj.href)?.href || '';\n } else if (linkObj?.href) {\n pdfLink = linkObj.href;\n }\n\n // Push cleaned object\n output.push({\n json: {\n title,\n author,\n abstract,\n pdfLink\n }\n });\n}\n\nreturn output;\n"
},
"typeVersion": 2
},
{
"id": "a246f20c-2bb9-4319-8812-e296c87a7df0",
"name": "Save Results to Google Sheet",
"type": "n8n-nodes-base.googleSheets",
"position": [
120,
580
],
"parameters": {
"columns": {
"value": {
"Topic": "={{ $('Set Research topic').item.json.Topic }}",
"title": "={{ $json.title }}",
"author": "={{ $json.author }}",
"abstract": "={{ $json.abstract }}",
"pdf link": "={{ $json.pdfLink }}"
},
"schema": [
{
"id": "Topic",
"type": "string",
"display": true,
"required": false,
"displayName": "Topic",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "title",
"type": "string",
"display": true,
"required": false,
"displayName": "title",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "author",
"type": "string",
"display": true,
"required": false,
"displayName": "author",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "abstract",
"type": "string",
"display": true,
"required": false,
"displayName": "abstract",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "pdf link",
"type": "string",
"display": true,
"required": false,
"displayName": "pdf link",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "append",
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1sOfCFsvHS9-BeE_PQ6J_jtQofCRcOv02XS7hrmFmpxQ/edit#gid=0",
"cachedResultName": "Sheet1"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "1sOfCFsvHS9-BeE_PQ6J_jtQofCRcOv02XS7hrmFmpxQ",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1sOfCFsvHS9-BeE_PQ6J_jtQofCRcOv02XS7hrmFmpxQ/edit?usp=drivesdk",
"cachedResultName": "Research papers from Google Scholar"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "r2mDaisH6e9VkwHl",
"name": "Google Sheets account"
}
},
"typeVersion": 4.6
},
{
"id": "1b4a1504-4a4a-4a0d-892b-d0c3e205ed85",
"name": "Sticky Note",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1140,
60
],
"parameters": {
"color": 5,
"width": 420,
"height": 720,
"content": "## 🔹 **Section 1: User Input & Trigger**\n\n**🧩 Nodes: Start Scraping | Set Topic**\n📍 **Purpose:** Let users easily input the topic they want to scrape — no need to deal with complex URLs.\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| --------- | ---------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| ⚡ Trigger | **Start Scraping (Manual)** | This node starts the workflow when you click “Execute Workflow.” It's the entry point. |\n| ✏️ Set | **Set Topic (Manual Input)** | Instead of requiring a URL, the user will enter a topic (like \"machine learning\" or \"digital marketing\"). This topic will be used to automatically generate the URL behind the scenes. |\n\n### 🧠 How it helps:\n\n* Great for beginners: Just type the topic, hit run.\n* Keeps the interface clean and user-friendly.\n* Avoids confusion around URLs and formats.\n\n---\n\n"
},
"typeVersion": 1
},
{
"id": "bc56f528-6d18-4e05-942f-c06bb6e10b27",
"name": "Sticky Note1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-660,
80
],
"parameters": {
"color": 6,
"width": 600,
"height": 700,
"content": "## 🔸 **Section 2: Scrape & Parse Website**\n\n**🧩 Nodes: Send Request | Extract HTML | Clean Data**\n📍 **Purpose:** Uses the Bright Data proxy to access the webpage, extract raw HTML content, and clean it up into a readable format (title, author, abstract, etc.).\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| --------------- | ------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| 🌐 HTTP Request | **Send Topic Request to Bright Data** | This sends a request to the Bright Data API using the topic you set earlier. It uses Bright Data’s network to safely load the actual website and return HTML content. |\n| 🧱 HTML Extract | **Extract Data from Webpage** | Parses the returned HTML to find relevant data like titles, authors, abstracts, and links. |\n| 🔣 Code | **Clean and Format Scraped Data** | A custom code block that organizes the messy data into neat records. For example: title → column A, abstract → column B, etc. |\n\n### 🧠 How it helps:\n\n* Makes web scraping safe and reliable by using proxies.\n* Converts unreadable HTML into structured information.\n* Beginner-friendly: No need to write a parser yourself.\n\n---\n\n"
},
"typeVersion": 1
},
{
"id": "2c54e5e6-011a-4562-98ac-9cc9834bc284",
"name": "Sticky Note2",
"type": "n8n-nodes-base.stickyNote",
"position": [
0,
0
],
"parameters": {
"color": 3,
"width": 340,
"height": 780,
"content": "## 🟢 **Section 3: Save to Google Sheets**\n\n**🧩 Node: Append to Google Sheets**\n📍 **Purpose:** Automatically sends the clean data into a Google Sheet for easy access, filtering, or sharing.\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| ---------------- | ------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |\n| 📄 Google Sheets | **Store Scraped Data in Spreadsheet** | Takes the structured output and appends it to the connected Google Sheet. Each result gets a row with title, author, abstract, etc. |\n\n### 🧠 How it helps:\n\n* No manual copy-pasting ever again!\n* Shareable and searchable format.\n* Updates automatically as you scrape more topics.\n\n---\n\n"
},
"typeVersion": 1
},
{
"id": "4ce90703-961e-4070-9356-c9dffc23a6c5",
"name": "Sticky Note9",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2980,
80
],
"parameters": {
"color": 4,
"width": 1300,
"height": 320,
"content": "=======================================\n WORKFLOW ASSISTANCE\n=======================================\nFor any questions or support, please contact:\n Yaron@nofluff.online\n\nExplore more tips and tutorials here:\n - YouTube: https://www.youtube.com/@YaronBeen/videos\n - LinkedIn: https://www.linkedin.com/in/yaronbeen/\n=======================================\n"
},
"typeVersion": 1
},
{
"id": "069ddb89-f7a1-4c4b-b65d-212be3252750",
"name": "Sticky Note4",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2980,
420
],
"parameters": {
"color": 4,
"width": 1289,
"height": 1878,
"content": "## 🌟 Research Paper Scraper to Google Sheets\n\n**Automate extraction of data from any website based on a topic — no coding needed!**\n\n---\n\n## 🔹 **Section 1: User Input & Trigger**\n\n**🧩 Nodes: Start Scraping | Set Topic**\n📍 **Purpose:** Let users easily input the topic they want to scrape — no need to deal with complex URLs.\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| --------- | ---------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| ⚡ Trigger | **Start Scraping (Manual)** | This node starts the workflow when you click “Execute Workflow.” It's the entry point. |\n| ✏️ Set | **Set Topic (Manual Input)** | Instead of requiring a URL, the user will enter a topic (like \"machine learning\" or \"digital marketing\"). This topic will be used to automatically generate the URL behind the scenes. |\n\n### 🧠 How it helps:\n\n* Great for beginners: Just type the topic, hit run.\n* Keeps the interface clean and user-friendly.\n* Avoids confusion around URLs and formats.\n\n---\n\n## 🔸 **Section 2: Scrape & Parse Website**\n\n**🧩 Nodes: Send Request | Extract HTML | Clean Data**\n📍 **Purpose:** Uses the Bright Data proxy to access the webpage, extract raw HTML content, and clean it up into a readable format (title, author, abstract, etc.).\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| --------------- | ------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| 🌐 HTTP Request | **Send Topic Request to Bright Data** | This sends a request to the Bright Data API using the topic you set earlier. It uses Bright Data’s network to safely load the actual website and return HTML content. |\n| 🧱 HTML Extract | **Extract Data from Webpage** | Parses the returned HTML to find relevant data like titles, authors, abstracts, and links. |\n| 🔣 Code | **Clean and Format Scraped Data** | A custom code block that organizes the messy data into neat records. For example: title → column A, abstract → column B, etc. |\n\n### 🧠 How it helps:\n\n* Makes web scraping safe and reliable by using proxies.\n* Converts unreadable HTML into structured information.\n* Beginner-friendly: No need to write a parser yourself.\n\n---\n\n## 🟢 **Section 3: Save to Google Sheets**\n\n**🧩 Node: Append to Google Sheets**\n📍 **Purpose:** Automatically sends the clean data into a Google Sheet for easy access, filtering, or sharing.\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| ---------------- | ------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |\n| 📄 Google Sheets | **Store Scraped Data in Spreadsheet** | Takes the structured output and appends it to the connected Google Sheet. Each result gets a row with title, author, abstract, etc. |\n\n### 🧠 How it helps:\n\n* No manual copy-pasting ever again!\n* Shareable and searchable format.\n* Updates automatically as you scrape more topics.\n\n---\n\n## ✅ What a Beginner Gains from This Workflow\n\n| 💡 Feature | 🚀 Benefit |\n| --------------------------- | --------------------------------------------------------------------------------- |\n| Topic-based input | You don’t need to find or understand complex URLs. Just type “AI” or “marketing.” |\n| Fully automated scraping | You don’t need to open browsers or inspect elements. |\n| Ready-to-use Google Sheet | The final data is clean and saved into a sheet you can use anywhere. |\n| Beautiful, modular workflow | Each step is visual, editable, and reusable without coding skills. |\n\n---\n\n## 🎯 Final Result:\n\nYou type a **topic** → Bright Data scrapes the web → It extracts content → Cleans it → Saves it into **Google Sheets**.\nEverything happens automatically. **No code. No hassle. Just data.**\n\n---\n\n"
},
"typeVersion": 1
},
{
"id": "a1a5e609-756a-4757-a026-1349cf388e61",
"name": "Sticky Note5",
"type": "n8n-nodes-base.stickyNote",
"position": [
400,
0
],
"parameters": {
"color": 7,
"width": 380,
"height": 240,
"content": "## I’ll receive a tiny commission if you join Bright Data through this link—thanks for fueling more free content!\n\n### https://get.brightdata.com/1tndi4600b25"
},
"typeVersion": 1
}
],
"active": false,
"pinData": {},
"settings": {
"executionOrder": "v1"
},
"versionId": "f931202a-3c22-495d-b775-71665bdf6c27",
"connections": {
"6e172db5-7483-4079-bf8a-785602526bdc": {
"main": [
[
{
"node": "e65d092a-6854-478c-b33e-2fc309f71ae8",
"type": "main",
"index": 0
}
]
]
},
"e65d092a-6854-478c-b33e-2fc309f71ae8": {
"main": [
[
{
"node": "211bae33-32c5-44e8-b306-a5e0d520a4a0",
"type": "main",
"index": 0
}
]
]
},
"7d81edf3-6f00-4634-b79f-dbda3f9958e5": {
"main": [
[
{
"node": "6e172db5-7483-4079-bf8a-785602526bdc",
"type": "main",
"index": 0
}
]
]
},
"9ab7ba20-8614-46c5-b57a-3749d6ae04c4": {
"main": [
[
{
"node": "a246f20c-2bb9-4319-8812-e296c87a7df0",
"type": "main",
"index": 0
}
]
]
},
"211bae33-32c5-44e8-b306-a5e0d520a4a0": {
"main": [
[
{
"node": "9ab7ba20-8614-46c5-b57a-3749d6ae04c4",
"type": "main",
"index": 0
}
]
]
}
}
}常见问题
如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
中级 - 人工智能
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
自动化论坛监控_via_Bright_data
使用Bright Data和n8n的论坛监控自动化
Set
Code
Html
+
Set
Code
Html
17 节点Yaron Been
人工智能
使用 Bright Data 抓取即将发生的事件
使用 Bright Data 和 n8n 的自动化事件发现
Code
Html
Http Request
+
Code
Html
Http Request
11 节点Yaron Been
人工智能
AI YouTube分析助手:评论分析与洞察报告
AI YouTube分析助手:评论分析器与洞察报告生成器
If
Set
Code
+
If
Set
Code
19 节点Yaron Been
人工智能
使用Bright Data和LLMs自动化大规模超个性化外联
通过Bright Data和大语言模型实现大规模超个性化外联自动化
If
Set
Wait
+
If
Set
Wait
21 节点Yaron Been
销售
通过Bright_data实现的自动化社交媒体标题发布器
使用Bright Data和n8n自动化社交媒体标题
Set
Html
Twitter
+
Set
Html
Twitter
16 节点Yaron Been
人工智能
通过Bright Data进行竞争对手价格监控
使用Bright Data和n8n的自动竞争对手价格监控
If
Code
Html
+
If
Code
Html
15 节点Yaron Been
人工智能
工作流信息
难度等级
中级
节点数量12
分类1
节点类型7
作者
Yaron Been
@yaron-nofluffBuilding AI Agents and Automations | Growth Marketer | Entrepreneur | Book Author & Podcast Host If you need any help with Automations, feel free to reach out via linkedin: https://www.linkedin.com/in/yaronbeen/ And check out my Youtube channel: https://www.youtube.com/@YaronBeen/videos
外部链接
在 n8n.io 查看 →
分享此工作流