サイトマップからベクトルストアへ:効率のなRAGワークフローの作成
上級
これはContent Creation, Multimodal AI分野の自動化ワークフローで、40個のノードを含みます。主にIf, Set, Xml, Code, Waitなどのノードを使用。 サイトマップからベクターストレージへ:効率のなRAGワークフローの作成
前提条件
- •PostgreSQLデータベース接続情報
- •Supabase URL と API Key
- •ターゲットAPIの認証情報が必要な場合あり
- •OpenAI API Key
使用ノード (40)
ワークフロープレビュー
ノード接続関係を可視化、ズームとパンをサポート
ワークフローをエクスポート
以下のJSON設定をn8nにインポートして、このワークフローを使用できます
{
"meta": {
"instanceId": "0862f70dc42e115052f6a2d4c2b6537665b4361a614cec7cd17d1c45c8868621",
"templateCredsSetupCompleted": true
},
"nodes": [
{
"id": "ab180eb3-c086-4f9f-b9d0-f3f56056a416",
"name": "「Test workflow」クリック時",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-6816,
-304
],
"parameters": {},
"typeVersion": 1
},
{
"id": "20e77374-c3ce-457f-945c-d6f6dc928de1",
"name": "HTTP リクエスト",
"type": "n8n-nodes-base.httpRequest",
"position": [
-6624,
-304
],
"parameters": {
"url": "https://www.kiekens.com/sitemap.xml",
"options": {}
},
"typeVersion": 4.2
},
{
"id": "b23dd724-1bd7-4eef-9e22-8bef987b2128",
"name": "XML",
"type": "n8n-nodes-base.xml",
"position": [
-6432,
-304
],
"parameters": {
"options": {}
},
"typeVersion": 1
},
{
"id": "4715b380-f386-4926-892e-2c133a1155c1",
"name": "分割出力",
"type": "n8n-nodes-base.splitOut",
"position": [
-6224,
-304
],
"parameters": {
"options": {},
"fieldToSplitOut": "urlset.url"
},
"typeVersion": 1
},
{
"id": "56181432-63f2-4d93-be6d-6f1489e04ca9",
"name": "アイテムループ処理",
"type": "n8n-nodes-base.splitInBatches",
"position": [
-5152,
-592
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "f52b3e19-7d64-4f3d-848d-81cf2b65bb15",
"name": "待機",
"type": "n8n-nodes-base.wait",
"position": [
-4192,
-608
],
"webhookId": "9af87c5e-b07f-48dc-9ca8-61b471a24cad",
"parameters": {
"amount": 30
},
"typeVersion": 1.1
},
{
"id": "961143cf-c387-4e2d-a477-0988c0b0f512",
"name": "条件分岐",
"type": "n8n-nodes-base.if",
"position": [
-3728,
-608
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "9d90c1ce-590e-40a5-ae8c-d92326032975",
"operator": {
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.status }}",
"rightValue": "completed"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "991580c5-10ed-4bab-811e-2ec50d4050fd",
"name": "デフォルトデータローダー",
"type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
"position": [
-2384,
-496
],
"parameters": {
"options": {
"metadata": {
"metadataValues": [
{
"name": "page",
"value": "={{ $json.result.url }}"
}
]
}
},
"jsonData": "={{ $json.cleanedText }}",
"jsonMode": "expressionData"
},
"typeVersion": 1
},
{
"id": "0fc79f0d-8ebd-4d61-ac29-7ba65284af52",
"name": "文字テキスト分割器",
"type": "@n8n/n8n-nodes-langchain.textSplitterCharacterTextSplitter",
"position": [
-2368,
-352
],
"parameters": {
"chunkSize": 5000
},
"typeVersion": 1
},
{
"id": "bc5aac68-bb66-4c9c-abd7-9a913b0a56fa",
"name": "Embeddings OpenAI",
"type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
"position": [
-2528,
-464
],
"parameters": {
"model": "text-embedding-ada-002",
"options": {}
},
"credentials": {
"openAiApi": {
"id": "OwpPpcltPaXyVklS",
"name": "OpenAi_Mariela.b.d."
}
},
"typeVersion": 1.1
},
{
"id": "e3b525eb-7a3f-456d-a476-b013293c85e0",
"name": "フィールド編集",
"type": "n8n-nodes-base.set",
"position": [
-4064,
-288
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "f2bcdb54-e1fe-4670-99aa-6eec973bf5f1",
"name": "task_id",
"type": "string",
"value": "={{ $('Crawl4ai Web Page Scrape').item.json.task_id }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "bdbed5ea-d1a1-4922-a7b7-759466709fcb",
"name": "Crawl4AI_タスクステータス",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueErrorOutput",
"position": [
-3968,
-608
],
"parameters": {
"url": "=https://crawl4ai-app-nrcsv.ondigitalocean.app/task/{{ $json.task_id }}",
"options": {
"timeout": 5000
},
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth"
},
"credentials": {
"httpHeaderAuth": {
"id": "De808MMiUFOFLbNm",
"name": "Crawl4ai_marinextai"
}
},
"retryOnFail": true,
"typeVersion": 4.2,
"waitBetweenTries": 5000
},
{
"id": "f0da6b36-885a-4e86-b044-f3b490bf3829",
"name": "アイテムループ処理1",
"type": "n8n-nodes-base.splitInBatches",
"position": [
-5824,
144
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "f78a39bd-183c-4985-b1b1-f3142dfe31f3",
"name": "条件分岐2",
"type": "n8n-nodes-base.if",
"position": [
-4736,
-592
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "or",
"conditions": [
{
"id": "fbc89427-990b-45d0-8538-e403c1b18ddd",
"operator": {
"type": "string",
"operation": "contains"
},
"leftValue": "={{ $json.status }}",
"rightValue": "pending"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "b6dfe888-4e2e-4c74-8a66-c3db28604514",
"name": "分割出力1",
"type": "n8n-nodes-base.splitOut",
"position": [
-5392,
-384
],
"parameters": {
"include": "selectedOtherFields",
"options": {},
"fieldToSplitOut": "url",
"fieldsToInclude": "status"
},
"typeVersion": 1
},
{
"id": "78f05cb5-8b9c-4f51-b252-4ca2195b52ad",
"name": "URLフォーマット",
"type": "n8n-nodes-base.set",
"position": [
-5648,
160
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "9038a5b3-6985-4edc-bdd1-8dc5a3e8877c",
"name": "loc",
"type": "string",
"value": "={{ $json.loc.trim().toLowerCase() }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "805f1fea-841b-40aa-a055-de7ddbbb306f",
"name": "Supabase テーブル内URL確認",
"type": "n8n-nodes-base.supabase",
"onError": "continueErrorOutput",
"position": [
-5456,
160
],
"parameters": {
"filters": {
"conditions": [
{
"keyName": "url",
"keyValue": "={{ $json.loc }}",
"condition": "eq"
}
]
},
"tableId": "scrape_queue",
"operation": "getAll",
"returnAll": true
},
"credentials": {
"supabaseApi": {
"id": "CYPZsYCPJqrO9xBO",
"name": "Supabase_N8N AI Agent Assistant_marinextai"
}
},
"retryOnFail": true,
"typeVersion": 1,
"alwaysOutputData": true,
"waitBetweenTries": 5000
},
{
"id": "4f6e6ccb-7757-4e9f-b50c-9acb2fe99009",
"name": "Supabase ノード出力フォーマット",
"type": "n8n-nodes-base.code",
"position": [
-5184,
160
],
"parameters": {
"jsCode": "const supabaseResult = $json;\n\n// Get the clean URL from the Set node (Edit Fields1)\nconst originalLoc = $('Format the URL').item.json.loc;\nconst cleanUrl = typeof originalLoc === 'string' ? originalLoc.trim().toLowerCase() : '';\n\n// Check if URL already exists\n// Empty object {} means URL doesn't exist, so we should insert\nconst shouldInsert = Object.keys(supabaseResult).length === 0;\n\nreturn [\n {\n json: {\n url: cleanUrl,\n shouldInsert,\n }\n }\n];"
},
"typeVersion": 2
},
{
"id": "54ed36e4-e675-4bd2-a74e-aeadbe7f486c",
"name": "「shouldInsert」がtrueの場合",
"type": "n8n-nodes-base.if",
"position": [
-4992,
160
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "f3a00d98-73af-4d35-b4e5-5158c120753f",
"operator": {
"type": "boolean",
"operation": "true",
"singleValue": true
},
"leftValue": "={{ $json.shouldInsert }}",
"rightValue": "true"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "483dc0c7-da52-423a-a3bb-cc9ef6d6f1df",
"name": "新規行URL",
"type": "n8n-nodes-base.supabase",
"position": [
-4752,
272
],
"parameters": {
"tableId": "scrape_queue",
"fieldsUi": {
"fieldValues": [
{
"fieldId": "url",
"fieldValue": "={{ $json.url }}"
}
]
}
},
"credentials": {
"supabaseApi": {
"id": "CYPZsYCPJqrO9xBO",
"name": "Supabase_N8N AI Agent Assistant_marinextai"
}
},
"typeVersion": 1
},
{
"id": "632752e1-138e-481f-92ad-2ac14c245c45",
"name": "付箋1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-5888,
64
],
"parameters": {
"width": 1280,
"height": 500,
"content": "## Put all Website`s URLs in Supabase Table - scrape_queue"
},
"typeVersion": 1
},
{
"id": "5fc57e6f-771c-4eaa-ba8e-8e233dc2a343",
"name": "Supabase 内 scrape_queue テーブル作成",
"type": "n8n-nodes-base.postgres",
"position": [
-6816,
-688
],
"parameters": {
"query": "CREATE TABLE scrape_queue (\n id uuid DEFAULT gen_random_uuid() PRIMARY KEY,\n url text NOT NULL UNIQUE,\n status text NOT NULL DEFAULT 'pending', -- 'pending', 'completed', 'error'\n task_id text,\n result text,\n created_at timestamp with time zone DEFAULT now(),\n updated_at timestamp with time zone DEFAULT now()\n);\n\n-- Optional: Auto-update updated_at on row change\nCREATE OR REPLACE FUNCTION update_updated_at_column()\nRETURNS TRIGGER AS $$\nBEGIN\n NEW.updated_at = now();\n RETURN NEW;\nEND;\n$$ language 'plpgsql';\n\nCREATE TRIGGER update_scrape_queue_updated_at\nBEFORE UPDATE ON scrape_queue\nFOR EACH ROW\nEXECUTE PROCEDURE update_updated_at_column();",
"options": {},
"operation": "executeQuery"
},
"credentials": {
"postgres": {
"id": "k1GeBv6AjFuwp2B1",
"name": "Postgres_N8N AI Agent Assistant_marinextai"
}
},
"typeVersion": 2.6
},
{
"id": "8b2666b7-0eb6-42df-9ae2-e204516dd3d1",
"name": "Supabase1 内 scrape_queue テーブル作成",
"type": "n8n-nodes-base.postgres",
"position": [
-6608,
-688
],
"parameters": {
"query": "CREATE TABLE documents (\n id SERIAL PRIMARY KEY,\n content TEXT,\n metadata JSONB,\n embedding VECTOR(1536) -- Adjust the dimension size based on your OpenAI model (e.g. ada-002 returns 1536)\n);",
"options": {},
"operation": "executeQuery"
},
"credentials": {
"postgres": {
"id": "k1GeBv6AjFuwp2B1",
"name": "Postgres_N8N AI Agent Assistant_marinextai"
}
},
"typeVersion": 2.6
},
{
"id": "7c7b8f66-00f6-48db-af03-fba30dc5e6b1",
"name": "付箋2",
"type": "n8n-nodes-base.stickyNote",
"position": [
-6848,
-768
],
"parameters": {
"color": 3,
"width": 500,
"height": 280,
"content": "## Execute Once"
},
"typeVersion": 1
},
{
"id": "82279582-c71b-43aa-8e60-6b8af7ce866c",
"name": "付箋",
"type": "n8n-nodes-base.stickyNote",
"position": [
-4992,
-736
],
"parameters": {
"color": 4,
"width": 460,
"height": 360,
"content": "## Get the URL from Supabase and check if it is completed or not\n\n**Only the NOT completed URLs will be passed**"
},
"typeVersion": 1
},
{
"id": "8b2245b2-cdc2-408a-879b-260335a10bcb",
"name": "付箋3",
"type": "n8n-nodes-base.stickyNote",
"position": [
-4448,
-736
],
"parameters": {
"color": 5,
"width": 640,
"height": 360,
"content": "## Crawl4AI URL Scraping"
},
"typeVersion": 1
},
{
"id": "b42143d2-1e13-4031-996a-26af2dc26632",
"name": "Crawl4ai Webページスクレイピング",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueErrorOutput",
"position": [
-4384,
-608
],
"parameters": {
"url": "https://crawl4ai-app-nrcsv.ondigitalocean.app/crawl",
"method": "POST",
"options": {},
"sendBody": true,
"authentication": "genericCredentialType",
"bodyParameters": {
"parameters": [
{
"name": "urls",
"value": "={{ $json.url }}"
},
{
"name": "priority",
"value": "10"
}
]
},
"genericAuthType": "httpHeaderAuth"
},
"credentials": {
"httpHeaderAuth": {
"id": "De808MMiUFOFLbNm",
"name": "Crawl4ai_marinextai"
}
},
"retryOnFail": true,
"typeVersion": 4.2,
"waitBetweenTries": 5000
},
{
"id": "6ac1fda6-8363-4cff-8810-7cb2ffa63b67",
"name": "スクレイピング冗長データ削除",
"type": "n8n-nodes-base.code",
"onError": "continueRegularOutput",
"position": [
-3488,
-768
],
"parameters": {
"jsCode": "return items.map(item => {\n // Handle both data structures\n const raw = item.json.result?.markdown || item.json.cleanedText || item.json.html || '';\n \n // Add a safety check for null/undefined\n if (!raw) {\n return {\n json: {\n url: item.json.result?.url || item.json.url || '',\n cleanedText: '',\n error: 'No content found to process'\n }\n };\n }\n \n let cleaned = raw\n // Remove headers but keep the content structure\n .replace(/^#{1,6}\\s+(.+)$/gm, '$1') // Convert headers to plain text\n \n // Remove markdown links but keep the text\n .replace(/\\[([^\\]]+)\\]\\([^)]+\\)/g, '$1') // Keep link text, remove URL\n \n // Remove code blocks completely\n .replace(/```[\\s\\S]*?```/g, '') \n .replace(/`([^`]+)`/g, '$1') // Remove inline code backticks but keep content\n \n // Remove markdown formatting\n .replace(/\\*\\*([^*]+)\\*\\*/g, '$1') // Remove bold formatting\n .replace(/\\*([^*]+)\\*/g, '$1') // Remove italic formatting\n .replace(/_{2,}([^_]+)_{2,}/g, '$1') // Remove underline formatting\n .replace(/~~([^~]+)~~/g, '$1') // Remove strikethrough\n \n // Remove lists formatting but keep content\n .replace(/^\\s*[-*+]\\s+/gm, '') // Remove bullet points\n .replace(/^\\s*\\d+\\.\\s+/gm, '') // Remove numbered lists\n \n // Remove HTML remnants\n .replace(/<[^>]*>/g, '') // Remove any remaining HTML tags\n .replace(/&[a-zA-Z0-9#]+;/g, '') // Remove HTML entities\n \n // Remove navigation and common web elements\n .replace(/\\b(Home|About|Contact|Privacy|Terms|Login|Register|Menu|Navigation|Footer|Header|Sidebar)\\b/gi, '')\n .replace(/\\b(Click here|Read more|Learn more|Show more|View all|See all)\\b/gi, '')\n .replace(/\\b(Previous|Next|Page \\d+|Back to top)\\b/gi, '')\n \n // Remove social media and sharing text\n .replace(/\\b(Share|Tweet|Facebook|LinkedIn|Instagram|Follow us|Subscribe)\\b/gi, '')\n \n // Remove common website noise\n .replace(/\\b(Cookie|Cookies|GDPR|Accept|Decline|Consent)\\b/gi, '')\n .replace(/\\b(Advertisement|Ad|Sponsored|Promotion)\\b/gi, '')\n \n // Remove excessive punctuation and symbols\n .replace(/[^\\w\\s.,!?;:()\\-\"']/g, '') // Keep only essential punctuation\n .replace(/\\.{2,}/g, '.') // Replace multiple dots with single dot\n .replace(/\\?{2,}/g, '?') // Replace multiple question marks\n .replace(/!{2,}/g, '!') // Replace multiple exclamation marks\n \n // Clean up whitespace and line breaks\n .replace(/\\n{3,}/g, '\\n\\n') // Replace multiple line breaks with double\n .replace(/\\s+/g, ' ') // Normalize whitespace\n .replace(/\\s*\\n\\s*/g, '\\n') // Clean line breaks\n \n // Remove lines that are too short (likely noise)\n .split('\\n')\n .filter(line => line.trim().length > 10) // Remove very short lines\n .join('\\n')\n \n .trim();\n \n // Additional quality checks\n const wordCount = cleaned.split(/\\s+/).length;\n const hasMinimumContent = wordCount >= 50; // Minimum 50 words\n \n // Check if content is mostly meaningful (not just numbers/symbols)\n const meaningfulContent = cleaned.replace(/[^\\w\\s]/g, '').length > cleaned.length * 0.7;\n \n // Extract additional metadata for better context\n const extractedTitle = raw.match(/^#{1,3}\\s+(.+)$/m)?.[1] || '';\n const domain = (item.json.result?.url || item.json.url || '').replace(/^https?:\\/\\//, '').split('/')[0];\n \n return {\n json: {\n url: item.json.result?.url || item.json.url || '',\n cleanedText: cleaned,\n wordCount: wordCount,\n hasMinimumContent: hasMinimumContent,\n meaningfulContent: meaningfulContent,\n extractedTitle: extractedTitle,\n domain: domain,\n contentLength: cleaned.length,\n // Quality score for filtering\n qualityScore: (hasMinimumContent ? 0.5 : 0) + (meaningfulContent ? 0.5 : 0)\n }\n };\n});"
},
"typeVersion": 2
},
{
"id": "520a512f-2da8-4cb7-b834-fe6fbfa2ad02",
"name": "Supabase ベクトルストア_ドキュメント",
"type": "@n8n/n8n-nodes-langchain.vectorStoreSupabase",
"position": [
-2544,
-672
],
"parameters": {
"mode": "insert",
"options": {
"queryName": "match_documents"
},
"tableName": {
"__rl": true,
"mode": "list",
"value": "documents",
"cachedResultName": "documents"
}
},
"credentials": {
"supabaseApi": {
"id": "CYPZsYCPJqrO9xBO",
"name": "Supabase_N8N AI Agent Assistant_marinextai"
}
},
"typeVersion": 1
},
{
"id": "edb03374-1674-4070-b8a6-7afff6118f9a",
"name": "行取得 - scrape_queue テーブル",
"type": "n8n-nodes-base.supabase",
"position": [
-4912,
-592
],
"parameters": {
"filters": {
"conditions": [
{
"keyName": "url",
"keyValue": "={{ $json.url }}"
}
]
},
"tableId": "scrape_queue",
"operation": "get"
},
"credentials": {
"supabaseApi": {
"id": "CYPZsYCPJqrO9xBO",
"name": "Supabase_N8N AI Agent Assistant_marinextai"
}
},
"typeVersion": 1
},
{
"id": "57358b66-0d48-4d53-a188-c5c550e46a9e",
"name": "行更新 - scrape_queue テーブル",
"type": "n8n-nodes-base.supabase",
"position": [
-2224,
-992
],
"parameters": {
"filters": {
"conditions": [
{
"keyName": "url",
"keyValue": "={{ $('Get a row - scrape_queue Table').item.json.url }}",
"condition": "eq"
}
]
},
"tableId": "scrape_queue",
"fieldsUi": {
"fieldValues": [
{
"fieldId": "status",
"fieldValue": "={{ $('Crawl4AI_Task Status').item.json.status }}"
},
{
"fieldId": "task_id",
"fieldValue": "={{ $('Crawl4ai Web Page Scrape').item.json.task_id }}"
}
]
},
"operation": "update"
},
"credentials": {
"supabaseApi": {
"id": "CYPZsYCPJqrO9xBO",
"name": "Supabase_N8N AI Agent Assistant_marinextai"
}
},
"typeVersion": 1
},
{
"id": "3291a358-282c-4cc2-a869-c9b4651e157e",
"name": "行更新 - scrape_queue テーブル1",
"type": "n8n-nodes-base.supabase",
"position": [
-3984,
-1072
],
"parameters": {
"filters": {
"conditions": [
{
"keyName": "url",
"keyValue": "={{ $('Get a row - scrape_queue Table').first().json.url }}",
"condition": "eq"
}
]
},
"tableId": "scrape_queue",
"fieldsUi": {
"fieldValues": [
{
"fieldId": "task_id",
"fieldValue": "={{ $json.task_id }}"
},
{
"fieldId": "status",
"fieldValue": "={{ $json.error.status }}"
}
]
},
"operation": "update"
},
"credentials": {
"supabaseApi": {
"id": "CYPZsYCPJqrO9xBO",
"name": "Supabase_N8N AI Agent Assistant_marinextai"
}
},
"typeVersion": 1
},
{
"id": "f801de82-dbe9-44c1-a6c3-ac2847e93060",
"name": "待機1",
"type": "n8n-nodes-base.wait",
"position": [
-4352,
-208
],
"webhookId": "32f2ac99-68dc-4afc-8ebb-f64625cc96ef",
"parameters": {
"unit": "minutes"
},
"typeVersion": 1.1
},
{
"id": "10aecbd3-6fd8-420f-b997-34d68eecde54",
"name": "品質フィルターノード",
"type": "n8n-nodes-base.code",
"onError": "continueRegularOutput",
"position": [
-3264,
-768
],
"parameters": {
"jsCode": "// Filter out low-quality content\nreturn items.filter(item => {\n const quality = item.json.qualityScore || 0;\n const minWords = item.json.wordCount >= 50;\n const hasContent = item.json.cleanedText.length > 200;\n \n return quality >= 0.5 && minWords && hasContent;\n});"
},
"typeVersion": 2
},
{
"id": "9473c86c-7525-41f6-a2be-f7750d930317",
"name": "コンテンツタイプ検出",
"type": "n8n-nodes-base.code",
"onError": "continueRegularOutput",
"position": [
-3008,
-768
],
"parameters": {
"jsCode": "// Content Type Detection - Fixed Version\nreturn items.map(item => {\n const text = item.json.cleanedText || '';\n \n // Content type detection function\n const detectContentType = (text) => {\n if (!text || text.length < 10) {\n return 'unknown';\n }\n \n const lowerText = text.toLowerCase();\n \n // Check for code content\n if (lowerText.includes('function') || lowerText.includes('class') || \n lowerText.includes('import') || lowerText.includes('def ') ||\n lowerText.includes('var ') || lowerText.includes('const ')) {\n return 'code';\n }\n \n // Check for tutorial content\n if (lowerText.includes('step 1') || lowerText.includes('tutorial') || \n lowerText.includes('how to') || lowerText.includes('guide') ||\n lowerText.includes('walkthrough')) {\n return 'tutorial';\n }\n \n // Check for FAQ content\n if (lowerText.includes('faq') || lowerText.includes('q:') || \n lowerText.includes('a:') || lowerText.includes('question') ||\n lowerText.includes('frequently asked')) {\n return 'faq';\n }\n \n // Check for documentation\n if (lowerText.includes('api') || lowerText.includes('documentation') ||\n lowerText.includes('reference') || lowerText.includes('manual')) {\n return 'documentation';\n }\n \n // Check for news/blog content\n if (lowerText.includes('published') || lowerText.includes('author') ||\n lowerText.includes('posted') || lowerText.includes('blog')) {\n return 'blog';\n }\n \n // Check for product/service pages\n if (lowerText.includes('price') || lowerText.includes('buy') ||\n lowerText.includes('purchase') || lowerText.includes('product')) {\n return 'product';\n }\n \n // Default to article\n return 'article';\n };\n \n // Detect content type\n const contentType = detectContentType(text);\n \n // Return the item with added content type\n return {\n json: {\n ...item.json, // Keep all existing data\n contentType: contentType\n }\n };\n});"
},
"typeVersion": 2
},
{
"id": "54873bf5-ecb2-44e3-9dfb-e0e6ace02917",
"name": "拡張メタデータ抽出",
"type": "n8n-nodes-base.code",
"onError": "continueRegularOutput",
"position": [
-2784,
-768
],
"parameters": {
"jsCode": "// Enhanced metadata extraction - Fixed Version\nreturn items.map(item => {\n const cleaned = item.json.cleanedText || '';\n const url = item.json.url || '';\n const contentType = item.json.contentType || 'article';\n \n // Extract title from the cleaned text (look for first meaningful line)\n const extractTitle = (text) => {\n if (!text) return '';\n \n const lines = text.split('\\n').filter(line => line.trim().length > 0);\n if (lines.length === 0) return '';\n \n // Find the first substantial line (likely the title)\n const titleLine = lines.find(line => \n line.trim().length > 10 && \n line.trim().length < 200 &&\n !line.includes('http') &&\n !line.includes('www.')\n );\n \n return titleLine ? titleLine.trim() : lines[0].trim();\n };\n \n // Extract domain from URL\n const extractDomain = (url) => {\n if (!url) return '';\n try {\n return url.replace(/^https?:\\/\\//, '').split('/')[0];\n } catch (e) {\n return '';\n }\n };\n \n // Count words in the text\n const countWords = (text) => {\n if (!text) return 0;\n return text.trim().split(/\\s+/).filter(word => word.length > 0).length;\n };\n \n // Calculate quality score\n const calculateQualityScore = (text, wordCount) => {\n if (!text || wordCount < 50) return 0;\n \n const meaningfulContent = text.replace(/[^\\w\\s]/g, '').length > text.length * 0.7;\n const hasMinimumContent = wordCount >= 50;\n \n return (hasMinimumContent ? 0.5 : 0) + (meaningfulContent ? 0.5 : 0);\n };\n \n // Simple language detection (basic version)\n const detectLanguage = (text) => {\n if (!text) return 'unknown';\n \n // Simple heuristic - could be improved with a proper language detection library\n const commonEnglishWords = ['the', 'and', 'is', 'in', 'to', 'of', 'a', 'for', 'on', 'with'];\n const commonDutchWords = ['de', 'het', 'en', 'van', 'een', 'in', 'op', 'te', 'aan', 'met'];\n \n const lowerText = text.toLowerCase();\n const englishCount = commonEnglishWords.filter(word => lowerText.includes(` ${word} `)).length;\n const dutchCount = commonDutchWords.filter(word => lowerText.includes(` ${word} `)).length;\n \n if (englishCount > dutchCount) return 'en';\n if (dutchCount > englishCount) return 'nl';\n return 'unknown';\n };\n \n // Extract all metadata\n const extractedTitle = extractTitle(cleaned);\n const domain = extractDomain(url);\n const wordCount = countWords(cleaned);\n const qualityScore = calculateQualityScore(cleaned, wordCount);\n const detectedLanguage = detectLanguage(cleaned);\n \n // Enhanced metadata object\n const metadata = {\n page: url,\n title: extractedTitle,\n domain: domain,\n contentType: contentType,\n wordCount: wordCount,\n scrapedDate: new Date().toISOString(),\n language: detectedLanguage,\n qualityScore: qualityScore,\n contentLength: cleaned.length\n };\n \n return {\n json: {\n ...item.json, // Keep all existing data\n metadata: metadata,\n // Also keep individual fields for easier access\n extractedTitle: extractedTitle,\n domain: domain,\n wordCount: wordCount,\n qualityScore: qualityScore,\n detectedLanguage: detectedLanguage\n }\n };\n});"
},
"typeVersion": 2
},
{
"id": "f2d3d6a3-b48e-4b08-bf8e-f8fff06d3494",
"name": "付箋4",
"type": "n8n-nodes-base.stickyNote",
"position": [
-3536,
-912
],
"parameters": {
"color": 6,
"width": 900,
"height": 340,
"content": "## Clean te HTML code"
},
"typeVersion": 1
},
{
"id": "6ddcf33d-84cb-4ee7-bf62-cb2747aff406",
"name": "条件分岐1",
"type": "n8n-nodes-base.if",
"position": [
-3632,
-288
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "3e84e5d8-e49c-4a7b-98c3-9e115f592c10",
"operator": {
"type": "string",
"operation": "exists",
"singleValue": true
},
"leftValue": "={{ $json.task_id }}",
"rightValue": ""
},
{
"id": "c6a0525f-3224-4ad5-8d0a-e0a7a27fb5d1",
"operator": {
"type": "number",
"operation": "gte"
},
"leftValue": "={{ $json.attempt_count }}",
"rightValue": 10
}
]
}
},
"typeVersion": 2.2
},
{
"id": "ffb7b9cb-a4fb-4db2-833c-331672de42bd",
"name": "行更新 - scrape_queue テーブル2",
"type": "n8n-nodes-base.supabase",
"position": [
-3376,
-176
],
"parameters": {
"filters": {
"conditions": [
{
"keyName": "url",
"keyValue": "={{ $('Get a row - scrape_queue Table').first().json.url }}",
"condition": "eq"
}
]
},
"tableId": "scrape_queue",
"fieldsUi": {
"fieldValues": [
{
"fieldId": "task_id",
"fieldValue": "={{ $json.task_id }}"
},
{
"fieldId": "status",
"fieldValue": "=error"
}
]
},
"operation": "update"
},
"credentials": {
"supabaseApi": {
"id": "CYPZsYCPJqrO9xBO",
"name": "Supabase_N8N AI Agent Assistant_marinextai"
}
},
"typeVersion": 1
},
{
"id": "44c7fe75-0e88-4114-b506-6e7850c2a038",
"name": "タスクIDカウンター",
"type": "n8n-nodes-base.code",
"position": [
-3856,
-288
],
"parameters": {
"jsCode": "// Simple counter that resets for each new task ID\nif (typeof globalThis.currentTaskId === 'undefined') {\n globalThis.currentTaskId = null;\n globalThis.currentCounter = 0;\n}\n\nreturn items.map(item => {\n const taskId = item.json.task_id;\n \n // Check if this is a new task ID\n if (globalThis.currentTaskId !== taskId) {\n // New task ID detected - reset counter\n globalThis.currentTaskId = taskId;\n globalThis.currentCounter = 1;\n } else {\n // Same task ID - increment counter\n globalThis.currentCounter++;\n }\n \n return {\n json: {\n ...item.json,\n attempt_count: globalThis.currentCounter\n }\n };\n});"
},
"typeVersion": 2
}
],
"pinData": {},
"connections": {
"961143cf-c387-4e2d-a477-0988c0b0f512": {
"main": [
[
{
"node": "6ac1fda6-8363-4cff-8810-7cb2ffa63b67",
"type": "main",
"index": 0
}
],
[
{
"node": "e3b525eb-7a3f-456d-a476-b013293c85e0",
"type": "main",
"index": 0
}
]
]
},
"6ddcf33d-84cb-4ee7-bf62-cb2747aff406": {
"main": [
[
{
"node": "ffb7b9cb-a4fb-4db2-833c-331672de42bd",
"type": "main",
"index": 0
}
],
[
{
"node": "f52b3e19-7d64-4f3d-848d-81cf2b65bb15",
"type": "main",
"index": 0
}
]
]
},
"f78a39bd-183c-4985-b1b1-f3142dfe31f3": {
"main": [
[
{
"node": "b42143d2-1e13-4031-996a-26af2dc26632",
"type": "main",
"index": 0
}
],
[
{
"node": "56181432-63f2-4d93-be6d-6f1489e04ca9",
"type": "main",
"index": 0
}
]
]
},
"b23dd724-1bd7-4eef-9e22-8bef987b2128": {
"main": [
[
{
"node": "4715b380-f386-4926-892e-2c133a1155c1",
"type": "main",
"index": 0
}
]
]
},
"f52b3e19-7d64-4f3d-848d-81cf2b65bb15": {
"main": [
[
{
"node": "bdbed5ea-d1a1-4922-a7b7-759466709fcb",
"type": "main",
"index": 0
}
]
]
},
"f801de82-dbe9-44c1-a6c3-ac2847e93060": {
"main": [
[
{
"node": "b42143d2-1e13-4031-996a-26af2dc26632",
"type": "main",
"index": 0
}
]
]
},
"4715b380-f386-4926-892e-2c133a1155c1": {
"main": [
[
{
"node": "f0da6b36-885a-4e86-b044-f3b490bf3829",
"type": "main",
"index": 0
}
]
]
},
"b6dfe888-4e2e-4c74-8a66-c3db28604514": {
"main": [
[
{
"node": "56181432-63f2-4d93-be6d-6f1489e04ca9",
"type": "main",
"index": 0
}
]
]
},
"e3b525eb-7a3f-456d-a476-b013293c85e0": {
"main": [
[
{
"node": "44c7fe75-0e88-4114-b506-6e7850c2a038",
"type": "main",
"index": 0
}
]
]
},
"20e77374-c3ce-457f-945c-d6f6dc928de1": {
"main": [
[
{
"node": "b23dd724-1bd7-4eef-9e22-8bef987b2128",
"type": "main",
"index": 0
}
]
]
},
"78f05cb5-8b9c-4f51-b252-4ca2195b52ad": {
"main": [
[
{
"node": "805f1fea-841b-40aa-a055-de7ddbbb306f",
"type": "main",
"index": 0
}
]
]
},
"56181432-63f2-4d93-be6d-6f1489e04ca9": {
"main": [
[],
[
{
"node": "edb03374-1674-4070-b8a6-7afff6118f9a",
"type": "main",
"index": 0
}
]
]
},
"44c7fe75-0e88-4114-b506-6e7850c2a038": {
"main": [
[
{
"node": "6ddcf33d-84cb-4ee7-bf62-cb2747aff406",
"type": "main",
"index": 0
}
]
]
},
"f0da6b36-885a-4e86-b044-f3b490bf3829": {
"main": [
[
{
"node": "b6dfe888-4e2e-4c74-8a66-c3db28604514",
"type": "main",
"index": 0
}
],
[
{
"node": "78f05cb5-8b9c-4f51-b252-4ca2195b52ad",
"type": "main",
"index": 0
}
]
]
},
"483dc0c7-da52-423a-a3bb-cc9ef6d6f1df": {
"main": [
[
{
"node": "f0da6b36-885a-4e86-b044-f3b490bf3829",
"type": "main",
"index": 0
}
]
]
},
"bc5aac68-bb66-4c9c-abd7-9a913b0a56fa": {
"ai_embedding": [
[
{
"node": "520a512f-2da8-4cb7-b834-fe6fbfa2ad02",
"type": "ai_embedding",
"index": 0
}
]
]
},
"991580c5-10ed-4bab-811e-2ec50d4050fd": {
"ai_document": [
[
{
"node": "520a512f-2da8-4cb7-b834-fe6fbfa2ad02",
"type": "ai_document",
"index": 0
}
]
]
},
"10aecbd3-6fd8-420f-b997-34d68eecde54": {
"main": [
[
{
"node": "9473c86c-7525-41f6-a2be-f7750d930317",
"type": "main",
"index": 0
}
]
]
},
"bdbed5ea-d1a1-4922-a7b7-759466709fcb": {
"main": [
[
{
"node": "961143cf-c387-4e2d-a477-0988c0b0f512",
"type": "main",
"index": 0
}
],
[
{
"node": "3291a358-282c-4cc2-a869-c9b4651e157e",
"type": "main",
"index": 0
}
]
]
},
"9473c86c-7525-41f6-a2be-f7750d930317": {
"main": [
[
{
"node": "54873bf5-ecb2-44e3-9dfb-e0e6ace02917",
"type": "main",
"index": 0
}
]
]
},
"0fc79f0d-8ebd-4d61-ac29-7ba65284af52": {
"ai_textSplitter": [
[
{
"node": "991580c5-10ed-4bab-811e-2ec50d4050fd",
"type": "ai_textSplitter",
"index": 0
}
]
]
},
"b42143d2-1e13-4031-996a-26af2dc26632": {
"main": [
[
{
"node": "f52b3e19-7d64-4f3d-848d-81cf2b65bb15",
"type": "main",
"index": 0
}
],
[
{
"node": "f801de82-dbe9-44c1-a6c3-ac2847e93060",
"type": "main",
"index": 0
}
]
]
},
"54ed36e4-e675-4bd2-a74e-aeadbe7f486c": {
"main": [
[
{
"node": "483dc0c7-da52-423a-a3bb-cc9ef6d6f1df",
"type": "main",
"index": 0
}
],
[
{
"node": "f0da6b36-885a-4e86-b044-f3b490bf3829",
"type": "main",
"index": 0
}
]
]
},
"54873bf5-ecb2-44e3-9dfb-e0e6ace02917": {
"main": [
[
{
"node": "520a512f-2da8-4cb7-b834-fe6fbfa2ad02",
"type": "main",
"index": 0
}
]
]
},
"edb03374-1674-4070-b8a6-7afff6118f9a": {
"main": [
[
{
"node": "f78a39bd-183c-4985-b1b1-f3142dfe31f3",
"type": "main",
"index": 0
}
]
]
},
"520a512f-2da8-4cb7-b834-fe6fbfa2ad02": {
"main": [
[
{
"node": "57358b66-0d48-4d53-a188-c5c550e46a9e",
"type": "main",
"index": 0
}
]
]
},
"ab180eb3-c086-4f9f-b9d0-f3f56056a416": {
"main": [
[
{
"node": "20e77374-c3ce-457f-945c-d6f6dc928de1",
"type": "main",
"index": 0
}
]
]
},
"57358b66-0d48-4d53-a188-c5c550e46a9e": {
"main": [
[
{
"node": "56181432-63f2-4d93-be6d-6f1489e04ca9",
"type": "main",
"index": 0
}
]
]
},
"3291a358-282c-4cc2-a869-c9b4651e157e": {
"main": [
[
{
"node": "56181432-63f2-4d93-be6d-6f1489e04ca9",
"type": "main",
"index": 0
}
]
]
},
"ffb7b9cb-a4fb-4db2-833c-331672de42bd": {
"main": [
[
{
"node": "56181432-63f2-4d93-be6d-6f1489e04ca9",
"type": "main",
"index": 0
}
]
]
},
"6ac1fda6-8363-4cff-8810-7cb2ffa63b67": {
"main": [
[
{
"node": "10aecbd3-6fd8-420f-b997-34d68eecde54",
"type": "main",
"index": 0
}
]
]
},
"4f6e6ccb-7757-4e9f-b50c-9acb2fe99009": {
"main": [
[
{
"node": "54ed36e4-e675-4bd2-a74e-aeadbe7f486c",
"type": "main",
"index": 0
}
]
]
},
"805f1fea-841b-40aa-a055-de7ddbbb306f": {
"main": [
[
{
"node": "4f6e6ccb-7757-4e9f-b50c-9acb2fe99009",
"type": "main",
"index": 0
}
]
]
}
}
}よくある質問
このワークフローの使い方は?
上記のJSON設定コードをコピーし、n8nインスタンスで新しいワークフローを作成して「JSONからインポート」を選択、設定を貼り付けて認証情報を必要に応じて変更してください。
このワークフローはどんな場面に適していますか?
上級 - コンテンツ作成, マルチモーダルAI
有料ですか?
このワークフローは完全無料です。ただし、ワークフローで使用するサードパーティサービス(OpenAI APIなど)は別途料金が発生する場合があります。
関連ワークフロー
ペットショップ 4
ペットショップ予約AIエージェント
If
Set
Code
+
If
Set
Code
187 ノードBruno Dias
人工知能
デリバリー ハンバーガーショップ MVP
🤖 レストランと配送の自動化を支援するAI駆動型WhatsAppアシスタント
If
Set
Code
+
If
Set
Code
152 ノードBruno Dias
コンテキスト・ハイブリッドRAG AIコピー
RAGアプリケーション向けのGoogle DriveからSupabaseコンテキストベクトルデータベースへの同期
If
Set
Code
+
If
Set
Code
76 ノードMichael Taleb
AI RAG検索拡張
コンテンツジェネレーター v3
AI驱动ブログ自動化:使用GPT-4生成并公開SEO記事至WordPressとTwitter
If
Set
Code
+
If
Set
Code
144 ノードJay Emp0
コンテンツ作成
私のスマートエージェントアリーナコミュニティ競技会
Qdrant、Mistral OCR、GPT-4を使ったRAGベースのQ&Aシステムの構築
Set
Code
Wait
+
Set
Code
Wait
41 ノードDavide
コンテンツ作成
Google Drive、Gemini、Supabaseを活用した自動更新型RAGチャットボットの作成
Google Drive、Gemini、Supabaseを使用して、自更新のRAGチャットボットを作成
Set
Code
Merge
+
Set
Code
Merge
45 ノードAnirudh Aeran
コンテンツ作成
ワークフロー情報
難易度
上級
ノード数40
カテゴリー2
ノードタイプ16
作成者
Mariela Slavenova
@marielabg🚀 Fractional Head of AI Ops | COO | CTO | I diagnose, fix & ship automations that pay for themselves | The Harden Method™ - Discover→Design→Build→Break→Harden→Launch→Monitor | Founder @ MarinextAI
外部リンク
n8n.ioで表示 →
このワークフローを共有