Google DriveからPDFデータを抽出して書式を整える

Name: Google DriveからPDFデータを抽出して書式を整える
Rating: 4.5 (10 reviews)
Author: EoCi - Mr.Eo

中級

これはContent Creation, Multimodal AI分野の自動化ワークフローで、15個のノードを含みます。主にSet, Code, GoogleDrive, ManualTrigger, ExtractFromFileなどのノードを使用。 Google DriveからPDFデータを抽出し、フォーマットを整える

前提条件

•Google Drive API認証情報

使用ノード (15)

カテゴリー

コンテンツ作成

マルチモーダルAI

ワークフロープレビュー

ノード接続関係を可視化、ズームとパンをサポート

完了！

開始

PDFデータのみ取得

PDFファイル/ファイルを取得

取得ファイル/ファイルをダウンロード

ファイル/ファイルのデータを抽出

データパーサー＆クリーナー

React Flow

ワークフローをエクスポート

以下のJSON設定をn8nにインポートして、このワークフローを使用できます

{
  "meta": {
    "instanceId": "cd9bb7894b11bab249a60976239056d06e4831b51d7348f6790a85241c21fc56",
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "4e195179-a7df-4daa-a734-4ddb75242d02",
      "name": "完了！",
      "type": "n8n-nodes-base.noOp",
      "position": [
        688,
        -32
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "2c1bacd1-864c-4da9-a3c8-fc6646a1935a",
      "name": "開始",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -480,
        0
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "d3a06fc0-6f82-4d6a-8cda-6694432830d8",
      "name": "PDFデータのみ取得",
      "type": "n8n-nodes-base.set",
      "position": [
        288,
        0
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "ccd95b23-ca0d-4e0a-a2af-c0e4fc9aae4e",
              "name": "text",
              "type": "string",
              "value": "={{ $json.text }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "2e7a429c-13ae-4ea9-80c5-5b482489e78b",
      "name": "PDFファイル/ファイルを取得",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        -304,
        0
      ],
      "parameters": {
        "filter": {
          "folderId": {
            "__rl": true,
            "mode": "list",
            "value": ""
          },
          "whatToSearch": "files"
        },
        "options": {
          "fields": [
            "id",
            "name"
          ]
        },
        "resource": "fileFolder",
        "returnAll": true,
        "queryString": "*.pdf"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "TB3MDL9X1SLIEPS5",
          "name": "Template"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "0ce127fc-8604-492b-96b5-8fff0ed1f6f6",
      "name": "取得ファイル/ファイルをダウンロード",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        -112,
        0
      ],
      "parameters": {
        "fileId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ $json.id }}"
        },
        "options": {
          "googleFileConversion": {
            "conversion": {
              "docsToFormat": "text/plain"
            }
          }
        },
        "operation": "download"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "TB3MDL9X1SLIEPS5",
          "name": "Template"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "0e761f9a-2d40-4787-8751-73e280beb452",
      "name": "ファイル/ファイルのデータを抽出",
      "type": "n8n-nodes-base.extractFromFile",
      "position": [
        80,
        0
      ],
      "parameters": {
        "options": {},
        "operation": "pdf"
      },
      "typeVersion": 1
    },
    {
      "id": "398f6a89-2792-4e50-9da4-9444455cc2ae",
      "name": "データパーサー＆クリーナー",
      "type": "n8n-nodes-base.code",
      "position": [
        480,
        0
      ],
      "parameters": {
        "jsCode": "/**\n * This function removes all newline characters (\"\\n\") from a given string.\n * In the context of your n8n workflow, you can use this in a \"Code\" node\n * to clean up the PDF text content before passing it to the AI Agent.\n *\n * @param {string} text The input string that may contain newline characters.\n * @returns {string} The processed string with all newline characters removed.\n */\nfunction removeNewlines(text) {\n  if (typeof text !== 'string') {\n    // Return an empty string or handle the error as appropriate for your workflow\n    console.error(\"Input must be a string.\");\n    return \"\";\n  }\n  // The .replace() method with a regular expression /g ensures all occurrences are replaced.\n  return text.replace(/\\n/g, ' ');\n}\n\n// Example usage based on the text you provided:\n// In your n8n \"Code\" node, you would get the input from the previous node.\n// For example: const a_variable_from_another_node = \"your text here\";\nconst inputText = $input.first().json.text;\nconst cleanedText = removeNewlines(inputText);\nconsole.log(\"Original Text:\");\nconsole.log(inputText);\nconsole.log(\"\\\\n------------------\\\\n\");\nconsole.log(\"Cleaned Text:\");\nconsole.log(cleanedText);\n\n// To use this in n8n, you'd typically return the result like this:\nreturn { cleanedText: cleanedText };\n"
      },
      "typeVersion": 2
    },
    {
      "id": "91f0e401-6ac0-496d-b99f-9c5056105f74",
      "name": "付箋2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        656,
        128
      ],
      "parameters": {
        "width": 560,
        "height": 256,
        "content": "## 🙏 **A Big Thank You For Trying This Workflow**\nYour time and trust mean a lot. I truly appreciate you giving this workflow a try.\n\nFeedback is the key to making this project better and more effective. If you have a moment, I'd love to hear your:\n- Suggestions for improvement.\n- Ideas for new features.\n- Requests for other automation workflows.\n\n### Thank you for being part of this journey!"
      },
      "typeVersion": 1
    },
    {
      "id": "5464e441-7f31-4a24-9fa1-afc18dd664a6",
      "name": "付箋3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        656,
        416
      ],
      "parameters": {
        "width": 560,
        "height": 448,
        "content": "## 🔍 **TROUBLESHOOTING**\nRunning into issues? Here are some common fixes.\n- **Common Issues:**\n  - **\"Workflow finds no files\":**\n    1. Double-check that the Folder in the Google Drive node is correct.\n    2. Ensure your n8n Google credential has permission to view files in that folder.\n    3. Verify the files actually have a .pdf extension.\n\n- **\"Code node throws an error\":**\n  - Open the Code node and check the browser's developer console for JavaScript syntax errors. Make sure the input path to your text (items[0].json.text), matches what the Extract From File node is providing.\n\n- **Debug Checklist:**\n[ ] Are your Google Drive credentials valid? Try reconnecting them.\n[ ] Did you select the correct folder in the first Google Drive node?\n[ ] Does the output of the Extract From File node show the text you expect?\n[ ] Is the Code node correctly referencing the input data from the previous node?"
      },
      "typeVersion": 1
    },
    {
      "id": "85ea9ac7-1668-4990-9f06-8a11f39013a2",
      "name": "付箋4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -512,
        176
      ],
      "parameters": {
        "width": 1136,
        "height": 688,
        "content": "## 🛠️ **STEP-BY-STEP SETUP GUIDE**\nFollow these steps to get your workflow running in under 5 minutes.\n---\n---\n---\n---\n---\n---"
      },
      "typeVersion": 1
    },
    {
      "id": "0ccb06b5-ca65-49cf-945d-309fccb6d4a1",
      "name": "付箋5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        64,
        288
      ],
      "parameters": {
        "width": 544,
        "height": 560,
        "content": "\n### **4. Configure the Download Node (Download Retrieval File)** 📥\nThis node takes the file IDs found in the previous step and downloads the files.\n- **Operation:** Ensure this is set to Download.\n- **File ID:** This field should already be set using an expression {{ $json.id }}. This dynamically pulls the ID of each file found in the search step. You can leave this as is.\n---\n---\n---\n\n### **5. Configure the Code Node (Data Parser & Cleaner)** ✨\nThis is where you define your custom cleaning rules.\n- Open the Code node to view the JavaScript editor.\n- The raw text from the PDF will be available as input (e.g., items[0].json.text).\n- Modify the JavaScript code to perform your desired cleaning. This could be as simple as trimming whitespace or as complex as using regular expressions to find specific data.\n---\n---\n---\n\n### **6. Test Your Workflow!** ✅\nNow let's see it in action.\n1. Click Execute workflow at the top of the canvas.\n2. The workflow will run, and each node should get a green checkmark.\n3. Click on the final node (Done !) and check its Output to see the clean, extracted text."
      },
      "typeVersion": 1
    },
    {
      "id": "56d9b5fe-5ae6-4d9b-b298-64e4884a5939",
      "name": "付箋6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -496,
        288
      ],
      "parameters": {
        "width": 544,
        "height": 560,
        "content": "### **1. Prepare Your Google Drive** 📂\nBefore you begin, make sure you have a dedicated folder in your Google Drive where you will place the PDFs you want to process.\n- In Google Drive, create a new folder (e.g., \"PDFs for n8n\").\n- Upload one or more PDF files into this folder to use for testing.\n---\n---\n---\n\n### **2. Connect Your Google Drive Credential** 🔗\nYou only need to connect your Google account once.\n- In the n8n canvas, click on the first Google Drive node (Get PDF Files/File).\n- In the \"Credential\" field, click \"Create New\", then fill in \"Client ID\" and \"Client Secret\".\n- After that, click on \"Sign In\" button a window will pop up asking you to sign in with your Google account and grant n8n permission.\n- Once completed, select this same credential for the second Google Drive node (Download Retrieval Files/File).\n---\n---\n---\n\n### **3. Configure the Search Node (Get PDF Files/File)** 🔎\n- This node tells the workflow where to look for your files.\n- **Operation:** Ensure this is set to Search.\n- **Search Query:** Type *.pdf to find all files with a PDF extension.\n- Click **\"Add Filter\"** and select Folder.\n- In the new filter, set the operation to In folder and use the list to select the Google Drive folder you created in Step 1.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "d85b90d7-2f7d-41f4-8c94-35b5a4c72487",
      "name": "付箋7",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1104,
        48
      ],
      "parameters": {
        "width": 560,
        "height": 192,
        "content": "## 🔧 **CUSTOMIZATION OPTIONS**\nMake this workflow your own! Here are a few ideas to get you started:\n- 💾 **Data Fields:** Modify the \"Get PDF Data Only\" node to get more data fields such as \"Number of Pages\", \"metadata\", \"info\".\n\n- ✨ **Parser & Cleaner Rules:** Modify the code of \"Data Parser & Cleaner\" node to get your desired output (formatted result)."
      },
      "typeVersion": 1
    },
    {
      "id": "ab5b72ca-c58d-499f-b770-90fe19086dfc",
      "name": "付箋8",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1104,
        272
      ],
      "parameters": {
        "width": 560,
        "height": 592,
        "content": "## 📋 **WORKFLOW FLOW EXPLAINED**\nThis workflow follows a simple, powerful, four-stage process to turn files into data.\n\n### **1. INPUT STAGE (File Discovery)**\n- The **\"Google Drive\"** node acts as the entry point, searching files/file in a specific folder you defined.\n- It's configured to find all files ending in `.pdf` to ensure only the correct documents are processed.\n\n### 2. **RETRIEVAL STAGE (File Download)**\n- The workflow loops over every file found in the previous stage.\n- A second Google Drive node downloads files/file, preparing for data extraction.\n\n### **3.PROCESSING STAGE (Data Extraction)**\n- The **\"Extract From File\"** node takes the binary data of the downloaded PDF.\n- It reads the document and pulls out all the raw, unstructured text from its pages.\n\n### **4. FORMATTING STAGE (Data Parsing & Cleaning)**\n- The raw text is passed to the Code node.\n- This is where the magic happens! A custom JavaScript script cleans the text by removing unwanted lines, fixing spacing, or even restructuring it into a clean JSON format. The output is ready for use."
      },
      "typeVersion": 1
    },
    {
      "id": "6d0970ab-067d-4975-842c-398fda000f40",
      "name": "付箋9",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -448,
        -400
      ],
      "parameters": {
        "width": 960,
        "height": 352,
        "content": "## 📁 **Extract and Clean PDF Data from Google Drive**\n### ⚡️**Quick Demo**\n- **Input:** \"A Google Drive folder containing multiple PDF files, like invoices or reports.\"\n- **Output:*** \"Clean, extracted text from each PDF, formatted by a custom script into a structured object ready for the next step.\"\n\n### ✅**What You Get**\n- **Automated File Discovery:** Automatically finds and loops through all .pdf files in a specific folder.\n- **Custom Cleaning Engine:** A dedicated Code node gives you full control to clean, parse, and structure the extracted text using JavaScript.\n- **On-Demand Execution:** A manual trigger lets you run the entire process with a single click whenever you need it.\n\n### 🎯**Perfect For**\n- Archiving the contents of articles, documents, reports, etc.\n- Anyone who often works with pdf files."
      },
      "typeVersion": 1
    }
  ],
  "pinData": {},
  "connections": {
    "2c1bacd1-864c-4da9-a3c8-fc6646a1935a": {
      "main": [
        [
          {
            "node": "2e7a429c-13ae-4ea9-80c5-5b482489e78b",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "d3a06fc0-6f82-4d6a-8cda-6694432830d8": {
      "main": [
        [
          {
            "node": "398f6a89-2792-4e50-9da4-9444455cc2ae",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "2e7a429c-13ae-4ea9-80c5-5b482489e78b": {
      "main": [
        [
          {
            "node": "0ce127fc-8604-492b-96b5-8fff0ed1f6f6",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "398f6a89-2792-4e50-9da4-9444455cc2ae": {
      "main": [
        [
          {
            "node": "4e195179-a7df-4daa-a734-4ddb75242d02",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "0e761f9a-2d40-4787-8751-73e280beb452": {
      "main": [
        [
          {
            "node": "d3a06fc0-6f82-4d6a-8cda-6694432830d8",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "0ce127fc-8604-492b-96b5-8fff0ed1f6f6": {
      "main": [
        [
          {
            "node": "0e761f9a-2d40-4787-8751-73e280beb452",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}

よくある質問

このワークフローの使い方は？

上記のJSON設定コードをコピーし、n8nインスタンスで新しいワークフローを作成して「JSONからインポート」を選択、設定を貼り付けて認証情報を必要に応じて変更してください。

このワークフローはどんな場面に適していますか？

中級 - コンテンツ作成, マルチモーダルAI

有料ですか？

このワークフローは完全無料です。ただし、ワークフローで使用するサードパーティサービス（OpenAI APIなど）は別途料金が発生する場合があります。

Google DriveからPDFデータを抽出して書式を整える

使用ノード (15)

カテゴリー

このワークフローの使い方は？

このワークフローはどんな場面に適していますか？

有料ですか？

関連ワークフロー

カテゴリー