OpenAI GPT4o-mini를 사용하여 음성을 텍스트로 변환하는 API를 만들기
중급
이것은Document Extraction, Multimodal AI분야의자동화 워크플로우로, 10개의 노드를 포함합니다.주로 Set, Webhook, HttpRequest, RespondToWebhook 등의 노드를 사용하며. OpenAI GPT4o-mini를 사용하여 오디오를 텍스트로 변환하는 API를 생성합니다.
사전 요구사항
- •HTTP Webhook 엔드포인트(n8n이 자동으로 생성)
- •대상 API의 인증 정보가 필요할 수 있음
워크플로우 미리보기
노드 연결 관계를 시각적으로 표시하며, 확대/축소 및 이동을 지원합니다
워크플로우 내보내기
다음 JSON 구성을 복사하여 n8n에 가져오면 이 워크플로우를 사용할 수 있습니다
{
"nodes": [
{
"id": "6b7dd876-ed21-47f6-877b-d6c45f8bc9b3",
"name": "OpenAI로 변환",
"type": "n8n-nodes-base.httpRequest",
"position": [
560,
140
],
"parameters": {
"url": "https://api.openai.com/v1/audio/transcriptions",
"method": "POST",
"options": {},
"sendBody": true,
"contentType": "multipart-form-data",
"authentication": "predefinedCredentialType",
"bodyParameters": {
"parameters": [
{
"name": "file",
"parameterType": "formBinaryData",
"inputDataFieldName": "audio_file"
},
{
"name": "model",
"value": "gpt-4o-mini-transcribe"
}
]
},
"nodeCredentialType": "openAiApi"
},
"credentials": {
"openAiApi": {
"id": "dMiSy27YCK6c6rra",
"name": "Duv's OpenAI"
}
},
"typeVersion": 4.2
},
{
"id": "26543502-9e91-4d70-af12-df78ac5ba630",
"name": "텍스트 추출",
"type": "n8n-nodes-base.set",
"position": [
840,
140
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "51b3d82e-6ef0-4b0b-86aa-33cf8203a24e",
"name": "Transcript",
"type": "string",
"value": "={{ $json.text }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "d12de568-f2b8-4757-b45f-f79bc579ee36",
"name": "변환할 오디오가 포함된 Webhook",
"type": "n8n-nodes-base.webhook",
"position": [
220,
140
],
"webhookId": "6a06f5e4-9105-4780-9840-9b7619a25647",
"parameters": {
"path": "audio-to-transcribe",
"options": {},
"httpMethod": "POST",
"responseMode": "responseNode"
},
"typeVersion": 2
},
{
"id": "e5de7e17-64a7-4466-a381-0dfb2e9d9711",
"name": "변환된 텍스트로 Webhook에 응답",
"type": "n8n-nodes-base.respondToWebhook",
"position": [
1100,
140
],
"parameters": {
"options": {
"responseCode": 200
}
},
"typeVersion": 1.3
},
{
"id": "883dbfbe-7330-41e6-bc7e-6dda8385250c",
"name": "메모지",
"type": "n8n-nodes-base.stickyNote",
"position": [
120,
700
],
"parameters": {
"color": 4,
"width": 580,
"height": 120,
"content": "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n <meta charset=\"UTF-8\">\n <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n <title>Transcribe Your Audio</title>\n <script src=\"https://cdn.tailwindcss.com\"></script>\n <style>\n body {\n font-family: sans-serif;\n background-color: #f0f4f8;\n display: flex;\n justify-content: center;\n align-items: center;\n min-height: 100vh;\n margin: 0;\n }\n .container {\n background-color: #ffffff;\n border-radius: 0.75rem; /* Equivalent to rounded-xl */\n box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06); /* Equivalent to shadow-md */\n padding: 2rem; /* Equivalent to p-8 */\n width: 100%;\n max-width: 28rem; /* Equivalent to max-w-sm */\n position: relative;\n }\n .loading-overlay {\n position: absolute;\n top: 0;\n left: 0;\n right: 0;\n bottom: 0;\n background: rgba(255, 255, 255, 0.8);\n backdrop-filter: blur(4px);\n display: flex;\n flex-direction: column;\n align-items: center;\n justify-content: center;\n border-radius: 0.75rem;\n z-index: 10;\n opacity: 0;\n visibility: hidden;\n transition: opacity 0.3s ease, visibility 0.3s ease;\n }\n .loading-overlay.visible {\n opacity: 1;\n visibility: visible;\n }\n .spinner {\n border: 4px solid rgba(0, 0, 0, 0.1);\n border-left-color: #000;\n border-radius: 50%;\n width: 32px;\n height: 32px;\n animation: spin 1s linear infinite;\n }\n @keyframes spin {\n to { transform: rotate(360deg); }\n }\n </style>\n</head>\n<body>\n <div class=\"container\">\n <h2 class=\"text-2xl font-bold text-center mb-6\">Transcribe Your Audio</h2>\n\n <!-- Audio Recording Section -->\n <div id=\"audio-section\" class=\"space-y-4\">\n <div class=\"flex flex-col items-center\">\n <button id=\"record-btn\" class=\"bg-blue-500 hover:bg-blue-600 text-white font-bold py-3 px-6 rounded-full text-lg mb-4 transition-colors\">\n Start Recording\n </button>\n <p id=\"recording-status\" class=\"text-gray-600 text-sm\">Press to record</p>\n <p id=\"timer\" class=\"text-xl font-mono text-gray-700 mt-2\">00:00</p>\n </div>\n\n <!-- Audio Playback -->\n <div id=\"audio-playback-container\" class=\"hidden flex flex-col items-center mt-4\">\n <audio id=\"audio-player\" controls class=\"w-full\"></audio>\n <div class=\"flex space-x-4 mt-4\">\n <button id=\"re-record-btn\" class=\"bg-gray-200 hover:bg-gray-300 text-gray-800 font-semibold py-2 px-4 rounded-md transition-colors\">\n Re-record\n </button>\n <button id=\"submit-audio-btn\" class=\"bg-green-500 hover:bg-green-600 text-white font-semibold py-2 px-4 rounded-md transition-colors\">\n Transcribe\n </button>\n </div>\n </div>\n </div>\n\n <!-- Transcript Display Section -->\n <div id=\"transcript-section\" class=\"hidden mt-6 space-y-4\">\n <label for=\"transcript-input\" class=\"block text-gray-700 font-semibold mb-1\">Your Transcript</label>\n <textarea id=\"transcript-input\" rows=\"6\" class=\"w-full p-3 border border-gray-300 rounded-md focus:ring-2 focus:ring-blue-500 focus:border-transparent resize-y\"></textarea>\n <button id=\"transcribe-another-btn\" class=\"w-full bg-blue-500 hover:bg-blue-600 text-white font-bold py-2 px-4 rounded-md transition-colors\">\n Transcribe Another Audio\n </button>\n </div>\n\n <!-- Loading Overlay -->\n <div id=\"loading-overlay\" class=\"loading-overlay\">\n <div class=\"spinner\"></div>\n <p id=\"loading-text\" class=\"mt-4 text-gray-700\">Processing...</p>\n </div>\n </div>\n\n <script>\n const recordBtn = document.getElementById('record-btn');\n const recordingStatus = document.getElementById('recording-status');\n const timerEl = document.getElementById('timer');\n const audioPlaybackContainer = document.getElementById('audio-playback-container');\n const audioPlayer = document.getElementById('audio-player');\n const reRecordBtn = document.getElementById('re-record-btn');\n const submitAudioBtn = document.getElementById('submit-audio-btn');\n const transcriptSection = document.getElementById('transcript-section');\n const transcriptInput = document.getElementById('transcript-input');\n const transcribeAnotherBtn = document.getElementById('transcribe-another-btn'); // Re-added\n const loadingOverlay = document.getElementById('loading-overlay');\n const loadingText = document.getElementById('loading-text');\n\n let mediaRecorder;\n let audioChunks = [];\n let recordedAudioBlob = null;\n let timerInterval;\n let seconds = 0;\n let isRecording = false;\n\n const WEBHOOK_URL = 'YOUR WEBHOOK URL';\n\n // --- Section Management ---\n const sections = {\n 'audio-section': document.getElementById('audio-section'),\n 'transcript-section': document.getElementById('transcript-section')\n };\n\n const showSection = (sectionId) => {\n for (const id in sections) {\n if (sections.hasOwnProperty(id)) {\n if (id === sectionId) {\n sections[id].classList.remove('hidden');\n } else {\n sections[id].classList.add('hidden');\n }\n }\n }\n };\n\n // --- Utility Functions ---\n const formatTime = (time) => {\n const minutes = Math.floor(time / 60).toString().padStart(2, '0');\n const seconds = Math.floor(time % 60).toString().padStart(2, '0');\n return `${minutes}:${seconds}`;\n };\n\n const showLoading = (message) => {\n loadingText.textContent = message;\n loadingOverlay.classList.add('visible');\n };\n\n const hideLoading = () => {\n loadingOverlay.classList.remove('visible');\n };\n\n const updateTimer = () => {\n seconds++;\n timerEl.textContent = formatTime(seconds);\n };\n\n const resetRecordingState = () => {\n isRecording = false;\n clearInterval(timerInterval);\n seconds = 0;\n timerEl.textContent = '00:00';\n recordBtn.textContent = 'Start Recording';\n recordBtn.classList.remove('bg-red-500', 'hover:bg-red-600');\n recordBtn.classList.add('bg-blue-500', 'hover:bg-blue-600');\n recordingStatus.textContent = 'Press to record';\n audioPlaybackContainer.classList.add('hidden');\n recordBtn.style.display = 'block';\n transcriptInput.value = '';\n };\n\n // --- Event Handlers ---\n recordBtn.addEventListener('click', async () => {\n if (isRecording) {\n mediaRecorder.stop();\n } else if (recordBtn.textContent === 'Record New Audio') {\n resetRecordingState();\n showSection('audio-section');\n } else {\n try {\n const stream = await navigator.mediaDevices.getUserMedia({ audio: true });\n mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });\n audioChunks = [];\n\n mediaRecorder.ondataavailable = (event) => {\n audioChunks.push(event.data);\n };\n\n mediaRecorder.onstop = () => {\n recordedAudioBlob = new Blob(audioChunks, { type: 'audio/webm' });\n const audioURL = URL.createObjectURL(recordedAudioBlob);\n audioPlayer.src = audioURL;\n stream.getTracks().forEach(track => track.stop());\n\n recordBtn.style.display = 'none';\n recordingStatus.textContent = 'Recording finished.';\n audioPlaybackContainer.classList.remove('hidden');\n clearInterval(timerInterval);\n };\n\n mediaRecorder.start();\n isRecording = true;\n recordBtn.textContent = 'Stop Recording';\n recordBtn.classList.remove('bg-blue-500', 'hover:bg-blue-600');\n recordBtn.classList.add('bg-red-500', 'hover:bg-red-600');\n recordingStatus.textContent = 'Recording...';\n seconds = 0;\n timerEl.textContent = '00:00';\n timerInterval = setInterval(updateTimer, 1000);\n } catch (error) {\n console.error('Error accessing microphone:', error);\n alert('Could not access microphone. Please allow access.'); // Using alert for simplicity as per previous instructions\n recordingStatus.textContent = 'Error: Microphone access denied.';\n }\n }\n });\n\n reRecordBtn.addEventListener('click', () => {\n resetRecordingState();\n showSection('audio-section');\n });\n\n submitAudioBtn.addEventListener('click', async () => {\n if (!recordedAudioBlob) {\n alert('Please record audio first.');\n return;\n }\n\n showLoading('Transcribing audio...');\n\n const formData = new FormData();\n formData.append('audio_file', recordedAudioBlob, 'audio.webm');\n\n try {\n const response = await fetch(WEBHOOK_URL, {\n method: 'POST',\n body: formData,\n });\n\n if (!response.ok) {\n throw new Error(`HTTP error! status: ${response.status}`);\n }\n\n const result = await response.json();\n console.log(\"Webhook response:\", result);\n\n transcriptInput.value = result.Transcript || 'No transcript received.';\n showSection('transcript-section');\n \n audioPlaybackContainer.classList.add('hidden');\n \n recordBtn.style.display = 'block'; \n recordBtn.textContent = 'Record New Audio'; \n recordBtn.classList.remove('bg-red-500', 'hover:bg-red-600');\n recordBtn.classList.add('bg-blue-500', 'hover:bg-blue-600');\n recordingStatus.textContent = 'Audio transcribed!';\n timerEl.textContent = '00:00';\n \n } catch (error) {\n console.error('Error sending audio to webhook:', error);\n alert(`Failed to transcribe audio: ${error.message}`);\n resetRecordingState();\n showSection('audio-section');\n } finally {\n hideLoading();\n }\n });\n\n // Event listener for the new \"Transcribe Another Audio\" button\n transcribeAnotherBtn.addEventListener('click', () => {\n resetRecordingState(); // Reset recording state\n showSection('audio-section'); // Go back to the audio recording section\n });\n\n // Initial setup when the page loads\n document.addEventListener('DOMContentLoaded', () => {\n showSection('audio-section');\n resetRecordingState(); // Ensure initial state is clean\n });\n </script>\n</body>\n</html>\n"
},
"typeVersion": 1
},
{
"id": "9c06f4c8-ae6e-43a9-9eda-a1452d81e17f",
"name": "메모지1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-320,
-40
],
"parameters": {
"width": 380,
"height": 860,
"content": "## Speech Transcription API Endpoint\n\nThis workflow exposes a webhook that transcribes any audio file sent to it.\n\n**How to use**\n\n1. **Add credentials:** Select the **Transcribe with OpenAI** node and add your OpenAI API key.\n2. **Get your endpoint URL:**\n\n * Make sure the workflow is **Active**.\n\n * Click the **Webhook** node and copy the **Production URL**.\n\n3. **Connect the frontend:**\n\n * Find the sticky note labeled \"Example Frontend Code Below\". Copy the code from the note beneath it.\n\n * In the code, replace the `YOUR WEBHOOK URL` placeholder with the URL you copied in step 2.\n\n\nThe provided snippet below is a great starting point. Feel free to adapt it and build the interface you need!\n\n"
},
"typeVersion": 1
},
{
"id": "b92cc8ab-64c9-4b24-a222-aa542b4bb710",
"name": "메모지2",
"type": "n8n-nodes-base.stickyNote",
"position": [
120,
380
],
"parameters": {
"color": 4,
"width": 580,
"height": 300,
"content": "## Example Frontend Code Below**\nThe sticky note directly below contains a complete HTML file that creates a functional audio recording interface. It's a plug-and-play example to demonstrate how to call this webhook.\n\n**To use it:**\n\n1. Copy the entire code block from the note below.\n2. Save it as an `.html` file.\n3. **Remember to replace** the `YOUR WEBHOOK URL` placeholder inside the code with your actual URL from the Webhook node.\n4. Open the file in your browser to test."
},
"typeVersion": 1
},
{
"id": "883ba3ee-2a32-477f-8493-da931847a9cb",
"name": "메모지3",
"type": "n8n-nodes-base.stickyNote",
"position": [
120,
-40
],
"parameters": {
"color": 7,
"width": 280,
"height": 360,
"content": "## The webhook to call from your app\nPOST the audio as \"audio_file\" to this webhook to start the workflow."
},
"typeVersion": 1
},
{
"id": "40e86f5a-c472-4801-9235-3a2f8e3b0088",
"name": "메모지4",
"type": "n8n-nodes-base.stickyNote",
"position": [
460,
-40
],
"parameters": {
"color": 7,
"width": 280,
"height": 360,
"content": "## AI transcription with OpenAI GPT4o-mini transcribe"
},
"typeVersion": 1
},
{
"id": "4d9d11f7-ebfa-4277-bf41-9070b6d052b1",
"name": "메모지5",
"type": "n8n-nodes-base.stickyNote",
"position": [
1020,
-20
],
"parameters": {
"color": 7,
"width": 280,
"height": 340,
"content": "## Sending the transcript back to your app\nYour app should expect the key \"Transcript\" in the body of the webhook response."
},
"typeVersion": 1
}
],
"connections": {
"26543502-9e91-4d70-af12-df78ac5ba630": {
"main": [
[
{
"node": "e5de7e17-64a7-4466-a381-0dfb2e9d9711",
"type": "main",
"index": 0
}
]
]
},
"6b7dd876-ed21-47f6-877b-d6c45f8bc9b3": {
"main": [
[
{
"node": "26543502-9e91-4d70-af12-df78ac5ba630",
"type": "main",
"index": 0
}
]
]
},
"d12de568-f2b8-4757-b45f-f79bc579ee36": {
"main": [
[
{
"node": "6b7dd876-ed21-47f6-877b-d6c45f8bc9b3",
"type": "main",
"index": 0
}
]
]
}
}
}자주 묻는 질문
이 워크플로우를 어떻게 사용하나요?
위의 JSON 구성 코드를 복사하여 n8n 인스턴스에서 새 워크플로우를 생성하고 "JSON에서 가져오기"를 선택한 후, 구성을 붙여넣고 필요에 따라 인증 설정을 수정하세요.
이 워크플로우는 어떤 시나리오에 적합한가요?
중급 - 문서 추출, 멀티모달 AI
유료인가요?
이 워크플로우는 완전히 무료이며 직접 가져와 사용할 수 있습니다. 다만, 워크플로우에서 사용하는 타사 서비스(예: OpenAI API)는 사용자 직접 비용을 지불해야 할 수 있습니다.
관련 워크플로우 추천
다단계 문서 승인 및 감사 워크플로우
Supabase 및 Gmail을 사용한 다단계 워크플로 문서 승인 자동화
If
Set
Form
+
If
Set
Form
38 노드Alok Kumar
문서 추출
Telegram을 사용하여 Mistral OCR를 통해 이미지와 PDF에서 텍스트를 Markdown로 추출
Telegram을 통해 Mistral OCR을 사용하여 이미지와 PDF에서 텍스트를 추출하여 Markdown로
If
Set
Code
+
If
Set
Code
40 노드Rostislav
문서 추출
GPT-4와 Airtable을 사용한 작업 기록 및 백업 워크플로 자동화
GPT-4와 Airtable로 워크플로우 자동 기록 및 백업
If
N8n
Set
+
If
N8n
Set
38 노드Guillaume Duvernay
AI 요약
Lookio와 OpenAI GPT를 사용하여 사실에 기반한 기사를 생성합니다.
사용하여 Lookio와 OpenAI GPT로 사실에 기반한 기사를 생성합니다.
Set
Split Out
Aggregate
+
Set
Split Out
Aggregate
19 노드Guillaume Duvernay
콘텐츠 제작
Super RAG과 GPT-5를 사용하여 사실에 기반한 기사를 생성합니다.
사용하여 Super RAG과 GPT-5로 사실에 기반한 기사를 생성합니다.
Set
Split Out
Aggregate
+
Set
Split Out
Aggregate
19 노드Guillaume Duvernay
AI RAG
Super Assistant를 사용하여 Slack에서定期의 AI 주도 데이터 요약 생성
Super Assistant를 사용하여 Slack에서定期의 AI 주도 데이터 요약 생성
Set
Slack
Http Request
+
Set
Slack
Http Request
9 노드Guillaume Duvernay
AI RAG