Bright Data、Gemini、Pinecone を使用して LLM 向けに AI 対応のベクトルデータセットを作成

上級

これはBuilding Blocks, AI分野の自動化ワークフローで、21個のノードを含みます。主にSet, HttpRequest, ManualTrigger, Agent, ChainLlmなどのノードを使用、AI技術を活用したスマート自動化を実現。 Bright Data、Gemini、Pinecone を使用して LLM 向け AI 就緒のベクトルデータセットを作成

前提条件
  • ターゲットAPIの認証情報が必要な場合あり
  • Google Gemini API Key
  • Pinecone API Key
ワークフロープレビュー
ノード接続関係を可視化、ズームとパンをサポート
ワークフローをエクスポート
以下のJSON設定をn8nにインポートして、このワークフローを使用できます
{
  "id": "3Lih0LVosR8dZbla",
  "meta": {
    "instanceId": "885b4fb4a6a9c2cb5621429a7b972df0d05bb724c20ac7dac7171b62f1c7ef40",
    "templateCredsSetupCompleted": true
  },
  "name": "Create AI-Ready Vector Datasets for LLMs with Bright Data, Gemini & Pinecone",
  "tags": [
    {
      "id": "Kujft2FOjmOVQAmJ",
      "name": "Engineering",
      "createdAt": "2025-04-09T01:31:00.558Z",
      "updatedAt": "2025-04-09T01:31:00.558Z"
    },
    {
      "id": "ZOwtAMLepQaGW76t",
      "name": "Building Blocks",
      "createdAt": "2025-04-13T15:23:40.462Z",
      "updatedAt": "2025-04-13T15:23:40.462Z"
    },
    {
      "id": "ddPkw7Hg5dZhQu2w",
      "name": "AI",
      "createdAt": "2025-04-13T05:38:08.053Z",
      "updatedAt": "2025-04-13T05:38:08.053Z"
    }
  ],
  "nodes": [
    {
      "id": "0a468953-e348-420e-a6b3-c55fb20d3cbf",
      "name": "「Test workflow」クリック時",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        200,
        -710
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "3725e480-246f-4f32-b0a7-b946cacbe830",
      "name": "AIエージェント",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        1236,
        -60
      ],
      "parameters": {
        "text": "=Format the below search result\n\n{{ $json.output.search_result }}",
        "options": {},
        "promptType": "define",
        "hasOutputParser": true
      },
      "typeVersion": 1.8
    },
    {
      "id": "30a12b8e-02f5-4b2e-bf9f-20fd9658405e",
      "name": "Pinecone ベクトルストア",
      "type": "@n8n/n8n-nodes-langchain.vectorStorePinecone",
      "position": [
        1628,
        -10
      ],
      "parameters": {
        "mode": "insert",
        "options": {},
        "pineconeIndex": {
          "__rl": true,
          "mode": "list",
          "value": "hacker-news",
          "cachedResultName": "hacker-news"
        }
      },
      "credentials": {
        "pineconeApi": {
          "id": "wdfRQ6NE8yjCDFhY",
          "name": "PineconeApi account"
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "1738dea6-fa4f-4a8d-a6fb-2f01feb1a6d5",
      "name": "Google Gemini 埋め込み",
      "type": "@n8n/n8n-nodes-langchain.embeddingsGoogleGemini",
      "position": [
        1612,
        210
      ],
      "parameters": {
        "modelName": "models/text-embedding-004"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "YeO7dHZnuGBVQKVZ",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "e6443541-de71-4d26-ad58-d7c72868a190",
      "name": "デフォルトデータローダー",
      "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
      "position": [
        1760,
        220
      ],
      "parameters": {
        "options": {},
        "jsonData": "={{ $('Information Extractor with Data Formatter').item.json.output.search_result }}",
        "jsonMode": "expressionData"
      },
      "typeVersion": 1
    },
    {
      "id": "09ffc8cd-096f-47fe-937d-f8ab4fb41266",
      "name": "再帰的文字テキスト分割器",
      "type": "@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter",
      "position": [
        1820,
        410
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 1
    },
    {
      "id": "90cc9aa4-0931-4c52-8734-e4e0de820205",
      "name": "Google Gemini チャットモデル1",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        1240,
        160
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.0-flash-exp"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "YeO7dHZnuGBVQKVZ",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "1090a4af-7e5d-446b-a537-3afe48cd4909",
      "name": "Google Gemini チャットモデル2",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        948,
        -340
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.0-flash-exp"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "YeO7dHZnuGBVQKVZ",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "324c530c-0a03-411e-acb0-d82e9dc635cf",
      "name": "Google Gemini チャットモデル",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        948,
        160
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.0-flash-exp"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "YeO7dHZnuGBVQKVZ",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "3226a2d6-ade1-4d6a-95c5-0be4d787a947",
      "name": "構造化出力パーサー",
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "position": [
        1400,
        160
      ],
      "parameters": {
        "jsonSchemaExample": "[{\n\t\"id\": \"<string>\",\n\t\"title\": \"<string>\",\n    \"summary\": \"<string>\",\n    \"keywords\": [\"\"],\n    \"topics\": [\"\"]\n}]"
      },
      "typeVersion": 1.2
    },
    {
      "id": "a739a314-900a-4ef7-9cc2-1b65374e2e05",
      "name": "付箋ノート",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        40,
        -360
      ],
      "parameters": {
        "width": 480,
        "height": 220,
        "content": "## Note\nPlease make sure to set the URL for web crawling. \n\nWeb-Unlocker Product is being utilized for performing the web scrapping. \n\nThis workflow is utilizing the Basic LLM Chain, Information Extraction with the AI Agents for formatting, extracting and persisting the response in PineCone Vector Database"
      },
      "typeVersion": 1
    },
    {
      "id": "3dca6d46-c423-4fb5-a6e4-c2aa2852d51c",
      "name": "フィールド設定 - URLおよびWebhook URL",
      "type": "n8n-nodes-base.set",
      "notes": "Set the URL which you are interested to scrap the data",
      "position": [
        420,
        -710
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "1c132dd6-31e4-453b-a8cf-cad9845fe55b",
              "name": "url",
              "type": "string",
              "value": "https://news.ycombinator.com?product=unlocker&method=api"
            },
            {
              "id": "90f3272b-d13d-44e2-8b4c-0943648cfce9",
              "name": "webhook_url",
              "type": "string",
              "value": "https://webhook.site/bc804ce5-4a45-4177-a68a-99c80e5c86e6"
            }
          ]
        }
      },
      "notesInFlow": true,
      "typeVersion": 3.4
    },
    {
      "id": "216a3261-a398-484c-9bf4-ca5966b829b6",
      "name": "ウェブリクエストをMake",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        640,
        -260
      ],
      "parameters": {
        "url": "https://api.brightdata.com/request",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "sendHeaders": true,
        "authentication": "genericCredentialType",
        "bodyParameters": {
          "parameters": [
            {
              "name": "zone",
              "value": "web_unlocker1"
            },
            {
              "name": "url",
              "value": "={{ $json.url }}"
            },
            {
              "name": "format",
              "value": "raw"
            }
          ]
        },
        "genericAuthType": "httpHeaderAuth",
        "headerParameters": {
          "parameters": [
            {}
          ]
        }
      },
      "credentials": {
        "httpHeaderAuth": {
          "id": "kdbqXuxIR8qIxF7y",
          "name": "Header Auth account"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "0c74e21c-3007-4297-b6ab-8ee17f4c6436",
      "name": "構造化JSONデータフォーマッター",
      "type": "@n8n/n8n-nodes-langchain.chainLlm",
      "position": [
        860,
        -560
      ],
      "parameters": {
        "text": "=Format the below response and produce a textual data. Output the response as per the below JSON schema.\n\nHere's the input: {{ $json.data }}\nHere's the JSON schema: \n\n[{\n    \"rank\": { \"type\": \"integer\" },\n    \"title\": { \"type\": \"string\" },\n    \"site\": { \"type\": \"string\" },\n    \"points\": { \"type\": \"integer\" },\n    \"user\": { \"type\": \"string\" },\n    \"age\": { \"type\": \"string\" },\n    \"comments\": { \"type\": \"string\" }\n}]",
        "messages": {
          "messageValues": [
            {
              "message": "You are an expert data formatter"
            }
          ]
        },
        "promptType": "define"
      },
      "typeVersion": 1.6
    },
    {
      "id": "012d4bb0-2b58-47cd-9cea-b4e0dced9082",
      "name": "構造化データ用Webhook",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1314,
        -860
      ],
      "parameters": {
        "url": "={{ $json.webhook_url }}",
        "options": {},
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "response",
              "value": "={{ $json.text }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "93b35e5e-6f52-4aeb-8f1b-39cc495beefe",
      "name": "構造化AIエージェント応答用Webhook",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1750,
        -660
      ],
      "parameters": {
        "url": "={{ $json.webhook_url }}",
        "options": {},
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "response",
              "value": "={{ $json.output }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "251b4251-255c-48c6-999b-02227fa2de9b",
      "name": "付箋ノート1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        800,
        -620
      ],
      "parameters": {
        "width": 360,
        "height": 420,
        "content": "## AI Data Formatter\n"
      },
      "typeVersion": 1
    },
    {
      "id": "f62463cd-6be3-4942-a636-de980a3154b4",
      "name": "付箋ノート2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1560,
        -160
      ],
      "parameters": {
        "color": 4,
        "width": 520,
        "height": 720,
        "content": "## Vector Database Persistence\n"
      },
      "typeVersion": 1
    },
    {
      "id": "ad20cc91-766a-4a57-be54-6f0d09a784eb",
      "name": "付箋ノート3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1260,
        -920
      ],
      "parameters": {
        "color": 3,
        "width": 680,
        "height": 440,
        "content": "## Webhook Notification Handler\n"
      },
      "typeVersion": 1
    },
    {
      "id": "37ab5c0f-d36e-4131-844d-20a22d3f2861",
      "name": "データフォーマッター付き情報抽出器",
      "type": "@n8n/n8n-nodes-langchain.informationExtractor",
      "position": [
        860,
        -60
      ],
      "parameters": {
        "text": "={{ $json.data }}",
        "options": {
          "systemPromptTemplate": "You are an expert HTML extractor. Your job is to analyze the search result and extract the content as a collection on items"
        },
        "attributes": {
          "attributes": [
            {
              "name": "search_result",
              "description": "Search Response"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "e04e189a-8ba9-4ef4-9a49-fc13daf00828",
      "name": "付箋ノート4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        800,
        -160
      ],
      "parameters": {
        "color": 5,
        "width": 720,
        "height": 720,
        "content": "## Data Extraction/Formatting with the AI Agent\n"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "799fb406-600d-45a5-b926-24b8844f33a5",
  "connections": {
    "3725e480-246f-4f32-b0a7-b946cacbe830": {
      "main": [
        [
          {
            "node": "30a12b8e-02f5-4b2e-bf9f-20fd9658405e",
            "type": "main",
            "index": 0
          },
          {
            "node": "93b35e5e-6f52-4aeb-8f1b-39cc495beefe",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "216a3261-a398-484c-9bf4-ca5966b829b6": {
      "main": [
        [
          {
            "node": "0c74e21c-3007-4297-b6ab-8ee17f4c6436",
            "type": "main",
            "index": 0
          },
          {
            "node": "37ab5c0f-d36e-4131-844d-20a22d3f2861",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "e6443541-de71-4d26-ad58-d7c72868a190": {
      "ai_document": [
        [
          {
            "node": "30a12b8e-02f5-4b2e-bf9f-20fd9658405e",
            "type": "ai_document",
            "index": 0
          }
        ]
      ]
    },
    "30a12b8e-02f5-4b2e-bf9f-20fd9658405e": {
      "ai_tool": [
        []
      ]
    },
    "1738dea6-fa4f-4a8d-a6fb-2f01feb1a6d5": {
      "ai_embedding": [
        [
          {
            "node": "30a12b8e-02f5-4b2e-bf9f-20fd9658405e",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "324c530c-0a03-411e-acb0-d82e9dc635cf": {
      "ai_languageModel": [
        [
          {
            "node": "37ab5c0f-d36e-4131-844d-20a22d3f2861",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "3226a2d6-ade1-4d6a-95c5-0be4d787a947": {
      "ai_outputParser": [
        [
          {
            "node": "3725e480-246f-4f32-b0a7-b946cacbe830",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "90cc9aa4-0931-4c52-8734-e4e0de820205": {
      "ai_languageModel": [
        [
          {
            "node": "3725e480-246f-4f32-b0a7-b946cacbe830",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "1090a4af-7e5d-446b-a537-3afe48cd4909": {
      "ai_languageModel": [
        [
          {
            "node": "0c74e21c-3007-4297-b6ab-8ee17f4c6436",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "0c74e21c-3007-4297-b6ab-8ee17f4c6436": {
      "main": [
        [
          {
            "node": "012d4bb0-2b58-47cd-9cea-b4e0dced9082",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "3dca6d46-c423-4fb5-a6e4-c2aa2852d51c": {
      "main": [
        [
          {
            "node": "216a3261-a398-484c-9bf4-ca5966b829b6",
            "type": "main",
            "index": 0
          },
          {
            "node": "012d4bb0-2b58-47cd-9cea-b4e0dced9082",
            "type": "main",
            "index": 0
          },
          {
            "node": "93b35e5e-6f52-4aeb-8f1b-39cc495beefe",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "09ffc8cd-096f-47fe-937d-f8ab4fb41266": {
      "ai_textSplitter": [
        [
          {
            "node": "e6443541-de71-4d26-ad58-d7c72868a190",
            "type": "ai_textSplitter",
            "index": 0
          }
        ]
      ]
    },
    "0a468953-e348-420e-a6b3-c55fb20d3cbf": {
      "main": [
        [
          {
            "node": "3dca6d46-c423-4fb5-a6e4-c2aa2852d51c",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "37ab5c0f-d36e-4131-844d-20a22d3f2861": {
      "main": [
        [
          {
            "node": "3725e480-246f-4f32-b0a7-b946cacbe830",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
よくある質問

このワークフローの使い方は?

上記のJSON設定コードをコピーし、n8nインスタンスで新しいワークフローを作成して「JSONからインポート」を選択、設定を貼り付けて認証情報を必要に応じて変更してください。

このワークフローはどんな場面に適していますか?

上級 - ビルディングブロック, 人工知能

有料ですか?

このワークフローは完全無料です。ただし、ワークフローで使用するサードパーティサービス(OpenAI APIなど)は別途料金が発生する場合があります。

ワークフロー情報
難易度
上級
ノード数21
カテゴリー2
ノードタイプ13
難易度説明

上級者向け、16ノード以上の複雑なワークフロー

外部リンク
n8n.ioで表示

このワークフローを共有

カテゴリー

カテゴリー: 34