POST
/api/v1/evalsCreate and run an evaluation
Creates a new eval run and executes it synchronously against the specified model. The provider API key must be configured in Settings. Returns the full results including per-case scores and an overall summary.
Authentication
Send Authorization: Bearer YOUR_API_KEY on every request. Generate API keys at /dashboard/api-keys.
Request body required
Example
{
"projectId": "00000000-0000-0000-0000-000000000000",
"name": "qa-regression-v2",
"model": "gpt-4o",
"prompt": "Answer the question: {{input}}",
"cases": [
{
"input": "<Test input>",
"expectedOutput": "<Expected output for comparison>"
}
],
"scorers": [
"exact-match",
"faithfulness",
"relevance"
]
}Schema
{
"application/json": {
"schema": {
"type": "object",
"required": [
"projectId",
"name",
"model",
"prompt",
"cases",
"scorers"
],
"properties": {
"projectId": {
"type": "string",
"format": "uuid",
"description": "Project ID"
},
"name": {
"type": "string",
"description": "Name for this eval run",
"example": "qa-regression-v2"
},
"model": {
"type": "string",
"description": "Model identifier",
"example": "gpt-4o"
},
"prompt": {
"type": "string",
"description": "Prompt template with {{input}} placeholder",
"example": "Answer the question: {{input}}"
},
"cases": {
"type": "array",
"items": {
"type": "object",
"required": [
"input"
],
"properties": {
"input": {
"type": "string",
"description": "Test input"
},
"expectedOutput": {
"type": "string",
"description": "Expected output for comparison"
}
}
},
"description": "Array of test cases"
},
"scorers": {
"type": "array",
"items": {
"type": "string"
},
"description": "Scorer names to apply",
"example": [
"exact-match",
"faithfulness",
"relevance"
]
}
}
}
}
}Response
201 example
{
"success": true,
"data": {
"id": "00000000-0000-0000-0000-000000000000",
"name": "string",
"status": "passed",
"model": "string",
"score": 0,
"results": [
{
"input": "string",
"expectedOutput": "string",
"actualOutput": "string",
"score": 0,
"passed": false,
"latency": 0,
"tokenUsage": {},
"scorerResults": {}
}
],
"summary": {
"totalCases": 0,
"passedCases": 0,
"failedCases": 0,
"passRate": 0,
"avgScore": 0,
"totalLatency": 0
}
}
}All status codes
201Eval run created and executed
400(no description)
429(no description)
Code samples
cURL
curl -X POST \
https://evalguard.ai/api/v1/evals \
-H "Authorization: Bearer $EVALGUARD_API_KEY" \
-H "Content-Type: application/json" \
-d '{ "projectId": "00000000-0000-0000-0000-000000000000", "name": "qa-regression-v2", "model": "gpt-4o", "prompt": "Answer the question: {{input}}", "cases": [ { "input": "<Test input>", "expectedOutput": "<Expected output for comparison>" } ], "scorers": [ "exact-match", "faithfulness", "relevance" ] }'TypeScript
import { EvalGuard } from "@evalguard/sdk";
const client = new EvalGuard({ apiKey: process.env.EVALGUARD_API_KEY });
const response = await client.request({
method: "POST",
path: "/api/v1/evals",
body: {
"projectId": "00000000-0000-0000-0000-000000000000",
"name": "qa-regression-v2",
"model": "gpt-4o",
"prompt": "Answer the question: {{input}}",
"cases": [
{
"input": "<Test input>",
"expectedOutput": "<Expected output for comparison>"
}
],
"scorers": [
"exact-match",
"faithfulness",
"relevance"
]
},
});
console.log(response);Python
from evalguard import EvalGuard
import os
client = EvalGuard(api_key=os.environ["EVALGUARD_API_KEY"])
response = client.request(
method="POST",
path="/api/v1/evals",
body={
"projectId": "00000000-0000-0000-0000-000000000000",
"name": "qa-regression-v2",
"model": "gpt-4o",
"prompt": "Answer the question: {{input}}",
"cases": [
{
"input": "<Test input>",
"expectedOutput": "<Expected output for comparison>"
}
],
"scorers": [
"exact-match",
"faithfulness",
"relevance"
]
},
)
print(response)Go
package main
import (
"context"
"fmt"
"os"
"github.com/evalguard/evalguard-go"
)
func main() {
client := evalguard.NewClient(os.Getenv("EVALGUARD_API_KEY"))
resp, err := client.Request(context.Background(), "POST", "/api/v1/evals", map[string]any{"projectId": "00000000-0000-0000-0000-000000000000", "name": "qa-regression-v2", "model": "gpt-4o", "prompt": "Answer the question: {{input}}", "cases": []any{map[string]any{"input": "<Test input>", "expectedOutput": "<Expected output for comparison>"}}, "scorers": []any{"exact-match", "faithfulness", "relevance"}})
if err != nil { panic(err) }
fmt.Println(resp)
}Errors
400429