Bewertungsmetriken: Zusammenfassung

Name: Bewertungsmetriken: Zusammenfassung
Rating: 4.5 (10 reviews)
Author: Jimleuk

Experte

Dies ist ein Engineering, AI-Bereich Automatisierungsworkflow mit 17 Nodes. Hauptsächlich werden Set, Webhook, Evaluation, GoogleDrive, ExtractFromFile und andere Nodes verwendet, kombiniert mit KI-Technologie für intelligente Automatisierung. Bewertungsmetriken: Zusammenfassung

Voraussetzungen

•HTTP Webhook-Endpunkt (wird von n8n automatisch generiert)
•Google Drive API-Anmeldedaten
•OpenAI API Key
•Google Gemini API Key

Verwendete Nodes (17)

OutputParserStructured

Kategorie

Engineering

Künstliche Intelligenz

Workflow-Vorschau

Visualisierung der Node-Verbindungen, mit Zoom und Pan

Wird ausgewertet?

OpenAI-Chat-Modell

Beim Abrufen eines Datensatzzeile

Auf Benutzer antworten

LLM

Ausgabe

Ausgaben setzen

Metriken setzen

Transkript herunterladen

Zusammenfassungs-Agent

Aus Datei extrahieren

Webhook-Trigger

Gdrive-URL abrufen

Zusammenfassung auswerten

React Flow

Workflow exportieren

Kopieren Sie die folgende JSON-Konfiguration und importieren Sie sie in n8n

{
  "meta": {
    "instanceId": "408f9fb9940c3cb18ffdef0e0150fe342d6e655c3a9fac21f0f644e8bedabcd9",
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "68eca3c1-4f42-4358-8c3e-f9730a960af9",
      "name": "Wird ausgewertet?",
      "type": "n8n-nodes-base.evaluation",
      "position": [
        860,
        100
      ],
      "parameters": {
        "operation": "checkIfEvaluating"
      },
      "typeVersion": 4.6
    },
    {
      "id": "dc7ea36c-c18c-4557-ba47-866c56460a39",
      "name": "OpenAI-Chat-Modell",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        468,
        290
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4.1-mini",
          "cachedResultName": "gpt-4.1-mini"
        },
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "id": "8gccIjcuf3gvaoEr",
          "name": "OpenAi account"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "fb639d23-2b51-4d5c-94a4-4356825bba52",
      "name": "Beim Abrufen eines Datensatzzeile",
      "type": "n8n-nodes-base.evaluationTrigger",
      "position": [
        -280,
        170
      ],
      "parameters": {
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": 82338773,
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit#gid=82338773",
          "cachedResultName": "Summarization"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit?usp=drivesdk",
          "cachedResultName": "96. Evaluations Test"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "id": "XHvC7jIRR8A2TlUl",
          "name": "Google Sheets account"
        }
      },
      "typeVersion": 4.6
    },
    {
      "id": "cf8646b6-425b-4e34-89c1-50e6fb76189d",
      "name": "Auf Benutzer antworten",
      "type": "n8n-nodes-base.noOp",
      "position": [
        1100,
        280
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "83cf1608-7c98-43fa-971c-ba38017cc37f",
      "name": "LLM",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        1140,
        120
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.0-flash"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "dSxo6ns5wn658r8N",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "5f688735-bbb7-4906-9c4b-f1069d095f05",
      "name": "Ausgabe",
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "position": [
        1260,
        120
      ],
      "parameters": {
        "jsonSchemaExample": "{\n  \"rating\": 1,\n  \"reason\": \"Tell me the reason for being lonely\"\n}"
      },
      "typeVersion": 1.2
    },
    {
      "id": "8a65a118-7b26-446f-92e2-6884386c2bcf",
      "name": "Ausgaben setzen",
      "type": "n8n-nodes-base.evaluation",
      "position": [
        1480,
        60
      ],
      "parameters": {
        "outputs": {
          "values": [
            {
              "outputName": "score",
              "outputValue": "={{ $json.output.score }}"
            },
            {
              "outputName": "score_reason",
              "outputValue": "={{ $json.output.reason }}"
            }
          ]
        },
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": 82338773,
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit#gid=82338773",
          "cachedResultName": "Summarization"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit?usp=drivesdk",
          "cachedResultName": "96. Evaluations Test"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "id": "XHvC7jIRR8A2TlUl",
          "name": "Google Sheets account"
        }
      },
      "typeVersion": 4.6
    },
    {
      "id": "883908c4-e08f-4492-abe9-a4559ad5a217",
      "name": "Metriken setzen",
      "type": "n8n-nodes-base.evaluation",
      "position": [
        1700,
        60
      ],
      "parameters": {
        "metrics": {
          "assignments": [
            {
              "id": "34913fdd-66e9-4581-b0bd-aa564d1a5c77",
              "name": "score",
              "type": "number",
              "value": "={{ $json.output.rating }}"
            }
          ]
        },
        "operation": "setMetrics"
      },
      "typeVersion": 4.6
    },
    {
      "id": "f6e24007-39e5-47e4-88fc-a7bf3a994fa6",
      "name": "Transkript herunterladen",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        -60,
        70
      ],
      "parameters": {
        "fileId": {
          "__rl": true,
          "mode": "url",
          "value": "={{ $json.input }}"
        },
        "options": {},
        "operation": "download"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "yOwz41gMQclOadgu",
          "name": "Google Drive account"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "767a7dc0-62e7-47b8-a9eb-84a399acdf1a",
      "name": "Zusammenfassungs-Agent",
      "type": "@n8n/n8n-nodes-langchain.chainLlm",
      "position": [
        380,
        70
      ],
      "parameters": {
        "text": "={{ $json.data }}",
        "batching": {},
        "messages": {
          "messageValues": [
            {
              "message": "Summarise the top 5 highlights of this video using the provided transcript."
            }
          ]
        },
        "promptType": "define"
      },
      "typeVersion": 1.7
    },
    {
      "id": "c1390261-2e11-4d8c-a609-76541c3dcdf0",
      "name": "Aus Datei extrahieren",
      "type": "n8n-nodes-base.extractFromFile",
      "position": [
        160,
        70
      ],
      "parameters": {
        "options": {},
        "operation": "text"
      },
      "typeVersion": 1
    },
    {
      "id": "a5387932-2f2f-40b7-9811-ab856417986f",
      "name": "Webhook-Trigger",
      "type": "n8n-nodes-base.webhook",
      "position": [
        -500,
        -30
      ],
      "webhookId": "088cd101-9dbc-46f2-899a-d2d91c15c1e5",
      "parameters": {
        "path": "088cd101-9dbc-46f2-899a-d2d91c15c1e5",
        "options": {}
      },
      "typeVersion": 2
    },
    {
      "id": "4671049e-5ae2-455a-b7f4-b1e9119ddfef",
      "name": "Gdrive-URL abrufen",
      "type": "n8n-nodes-base.set",
      "position": [
        -280,
        -30
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "504e6b0b-77b9-4ec9-b2f9-3dbbb6bce953",
              "name": "input",
              "type": "string",
              "value": "={{ $json.body.gdrive_url }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "c4a8cdb7-b462-412e-92fe-82c278375e9c",
      "name": "Zusammenfassung auswerten",
      "type": "@n8n/n8n-nodes-langchain.chainLlm",
      "position": [
        1100,
        -40
      ],
      "parameters": {
        "text": "=# Inputs and AI-generated Response\n### Transcript\n{{ $('Extract from File').item.json.data }}\n\n## AI-generated Response\n{{ $('Summarise Agent').item.json.text }}\n",
        "batching": {},
        "messages": {
          "messageValues": [
            {
              "message": "=# Instruction\nYou are an expert evaluator. Your task is to evaluate the quality of the responses generated by AI models.\nWe will provide you with the user input and an AI-generated responses.\nYou should first read the user input carefully for analyzing the task, and then evaluate the quality of the responses based on the Criteria provided in the Evaluation section below.\nYou will assign the response a rating following the Rating Rubric and Evaluation Steps. Give step-by-step explanations for your rating, and only choose ratings from the Rating Rubric.\n\n# Evaluation\n## Metric Definition\nYou will be assessing summarization quality, which measures the overall ability to summarize text. Pay special attention to length constraints, such as in X words or in Y sentences. The instruction for performing a summarization task and the context to be summarized are provided in the user prompt. The response should be shorter than the text in the context. The response should not contain information that is not present in the context.\n\n## Criteria\nInstruction following: The response demonstrates a clear understanding of the summarization task instructions, satisfying all of the instruction's requirements.\nGroundedness: The response contains information included only in the context. The response does not reference any outside information.\nConciseness: The response summarizes the relevant details in the original text without a significant loss in key information without being too verbose or terse.\nFluency: The response is well-organized and easy to read.\n\n## Rating Rubric\n5: (Very good). The summary follows instructions, is grounded, is concise, and fluent.\n4: (Good). The summary follows instructions, is grounded, concise, and fluent.\n3: (Ok). The summary mostly follows instructions, is grounded, but is not very concise and is not fluent.\n2: (Bad). The summary is grounded, but does not follow the instructions.\n1: (Very bad). The summary is not grounded.\n\n## Evaluation Steps\nSTEP 1: Assess the response in aspects of instruction following, groundedness, conciseness, and verbosity according to the criteria.\nSTEP 2: Score based on the rubric."
            }
          ]
        },
        "promptType": "define",
        "hasOutputParser": true
      },
      "typeVersion": 1.7
    },
    {
      "id": "f994670b-3c65-434e-9bf1-35c83fd0267c",
      "name": "Notizzettel1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -580,
        -240
      ],
      "parameters": {
        "color": 7,
        "width": 1280,
        "height": 700,
        "content": "## 1. Setup Your AI Workflow to Use Evaluations\n[Learn more about the Evaluations Trigger](https://docs.n8n.io/integrations/builtin/?utm_source=n8n_app&utm_medium=node_settings_modal-credential_link&utm_campaign=n8n-nodes-base.evaluationTrigger)\n\nIn this scenario, we're evaluating the LLM's ability to summarise Youtube Video Transcripts. The transcripts are stored remotely in Google Drive and the expected input is the file's URL."
      },
      "typeVersion": 1
    },
    {
      "id": "87506ff3-c677-4d61-963c-26ff5bcac1e6",
      "name": "Notizzettel2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        720,
        -240
      ],
      "parameters": {
        "color": 7,
        "width": 1160,
        "height": 700,
        "content": "## 2. Summarization: Is the AI's summary grounded in source material?\n[Learn more about the Evaluations Node](https://docs.n8n.io/integrations/builtin/?utm_source=n8n_app&utm_medium=node_settings_modal-credential_link&utm_campaign=n8n-nodes-base.evaluation)\n\nFor this evaluation, we simply want to check if the Agent's answer was grounded the incoming transcript.\nA higher score represents a greater alignment between the transcript and the expected output, indicating that the LLM is effectively sourcing relevant and accurate content to aid the generator in producing contextually appropriate responses."
      },
      "typeVersion": 1
    },
    {
      "id": "3b345083-74d2-4d4b-92a4-ab22febffb4b",
      "name": "Notizzettel3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -980,
        -380
      ],
      "parameters": {
        "width": 380,
        "height": 840,
        "content": "## Try It Out!\n### This n8n template demonstrates how to calculate the evaluation metric \"Summarization\" which in this scenario, measures the LLM's accuracy and faithfulness in producing summaries which are based on an incoming transcript.\n\nThe scoring approach is adapted from [https://cloud.google.com/vertex-ai/generative-ai/docs/models/metrics-templates#pointwise_summarization_quality](https://cloud.google.com/vertex-ai/generative-ai/docs/models/metrics-templates#pointwise_summarization_quality)\n\n### How it works\n* This evaluation works best for an AI summarization workflows.\n* For our scoring, we simple compare the generated response to the original transcript.\n* A key factor is to look out information in the response which is not mentioned in the documents.\n* A high score indicates LLM adherence and alignment whereas a low score could signal inadequate prompt or model hallucination.\n\n### Requirements\n* n8n version 1.94+\n* Check out this Google Sheet for a sample data [https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit?usp=sharing](https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit?usp=sharing)\n\n\n### Need Help?\nJoin the [Discord](https://discord.com/invite/XPKeKXeB7d) or ask in the [Forum](https://community.n8n.io/)!\n\nHappy Hacking!"
      },
      "typeVersion": 1
    }
  ],
  "pinData": {},
  "connections": {
    "83cf1608-7c98-43fa-971c-ba38017cc37f": {
      "ai_languageModel": [
        [
          {
            "node": "c4a8cdb7-b462-412e-92fe-82c278375e9c",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "5f688735-bbb7-4906-9c4b-f1069d095f05": {
      "ai_outputParser": [
        [
          {
            "node": "c4a8cdb7-b462-412e-92fe-82c278375e9c",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "Webhook": {
      "main": [
        [
          {
            "node": "4671049e-5ae2-455a-b7f4-b1e9119ddfef",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "8a65a118-7b26-446f-92e2-6884386c2bcf": {
      "main": [
        [
          {
            "node": "883908c4-e08f-4492-abe9-a4559ad5a217",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "4671049e-5ae2-455a-b7f4-b1e9119ddfef": {
      "main": [
        [
          {
            "node": "f6e24007-39e5-47e4-88fc-a7bf3a994fa6",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "68eca3c1-4f42-4358-8c3e-f9730a960af9": {
      "main": [
        [
          {
            "node": "c4a8cdb7-b462-412e-92fe-82c278375e9c",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "cf8646b6-425b-4e34-89c1-50e6fb76189d",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "767a7dc0-62e7-47b8-a9eb-84a399acdf1a": {
      "main": [
        [
          {
            "node": "68eca3c1-4f42-4358-8c3e-f9730a960af9",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "c1390261-2e11-4d8c-a609-76541c3dcdf0": {
      "main": [
        [
          {
            "node": "767a7dc0-62e7-47b8-a9eb-84a399acdf1a",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "767a7dc0-62e7-47b8-a9eb-84a399acdf1a",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "f6e24007-39e5-47e4-88fc-a7bf3a994fa6": {
      "main": [
        [
          {
            "node": "c1390261-2e11-4d8c-a609-76541c3dcdf0",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "c4a8cdb7-b462-412e-92fe-82c278375e9c": {
      "main": [
        [
          {
            "node": "8a65a118-7b26-446f-92e2-6884386c2bcf",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "fb639d23-2b51-4d5c-94a4-4356825bba52": {
      "main": [
        [
          {
            "node": "f6e24007-39e5-47e4-88fc-a7bf3a994fa6",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}

Häufig gestellte Fragen

Wie verwende ich diesen Workflow?

Kopieren Sie den obigen JSON-Code, erstellen Sie einen neuen Workflow in Ihrer n8n-Instanz und wählen Sie "Aus JSON importieren". Fügen Sie die Konfiguration ein und passen Sie die Anmeldedaten nach Bedarf an.

Für welche Szenarien ist dieser Workflow geeignet?

Experte - Engineering, Künstliche Intelligenz

Ist es kostenpflichtig?

Dieser Workflow ist völlig kostenlos. Beachten Sie jedoch, dass Drittanbieterdienste (wie OpenAI API), die im Workflow verwendet werden, möglicherweise kostenpflichtig sind.