POST/api/v1/evals

Create and run an evaluation

Creates a new eval run and fires background execution, returning 201 immediately with the run id. Poll GET /api/v1/evals/{runId} for progress + results. Set external=true for imperative-logger runs (the SDK streams its own results via POST /evals/{runId}/results and closes the run via PATCH /evals/{runId}); for external runs the prompt/cases/scorers minimums are relaxed. requiredRole: editor. Supports Idempotency-Key.

Authentication

Send Authorization: Bearer YOUR_API_KEY on every request. Generate API keys at /dashboard/api-keys.

Request body required

Example

{
  "name": "qa-regression-v2",
  "projectId": "<Project UUID (or 'default' for MVP). Not>",
  "model": "gpt-4o",
  "prompt": "Answer the question: {{input}}",
  "cases": [
    {
      "input": "string",
      "expectedOutput": "string"
    }
  ],
  "scorers": [
    "exact-match",
    "faithfulness",
    "relevance"
  ],
  "external": false,
  "concurrency": {
    "maxConcurrency": 1,
    "minConcurrency": 1,
    "initialConcurrency": 1
  }
}

Schema

{
  "application/json": {
    "schema": {
      "type": "object",
      "properties": {
        "name": {
          "type": "string",
          "minLength": 1,
          "maxLength": 100,
          "example": "qa-regression-v2"
        },
        "projectId": {
          "type": "string",
          "minLength": 1,
          "description": "Project UUID (or 'default' for MVP). Not strictly UUID-validated."
        },
        "model": {
          "type": "string",
          "minLength": 1,
          "example": "gpt-4o"
        },
        "prompt": {
          "type": "string",
          "description": "Templated instruction with {{input}} placeholder. Required for server-run evals (external=false); may be empty for external runs.",
          "example": "Answer the question: {{input}}"
        },
        "cases": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "input": {
                "type": "string",
                "minLength": 1
              },
              "expectedOutput": {
                "type": "string"
              }
            },
            "required": [
              "input"
            ],
            "additionalProperties": false
          },
          "description": "Test cases. At least 1 required for server-run evals."
        },
        "scorers": {
          "type": "array",
          "items": {
            "type": "string"
          },
          "description": "Scorer names. At least 1 required for server-run evals.",
          "example": [
            "exact-match",
            "faithfulness",
            "relevance"
          ]
        },
        "external": {
          "type": "boolean",
          "description": "External (imperative-logger) mode. When true the server creates the run in 'running' state and does NOT execute the model; the caller streams results out-of-band. Relaxes the prompt/cases/scorers minimums."
        },
        "concurrency": {
          "type": "object",
          "properties": {
            "maxConcurrency": {
              "type": "integer",
              "minimum": 1,
              "maximum": 50
            },
            "minConcurrency": {
              "type": "integer",
              "minimum": 1,
              "maximum": 50
            },
            "initialConcurrency": {
              "type": "integer",
              "minimum": 1,
              "maximum": 50
            }
          },
          "additionalProperties": false
        }
      },
      "required": [
        "name",
        "projectId",
        "model",
        "prompt",
        "cases",
        "scorers"
      ],
      "additionalProperties": false
    }
  }
}

Response

201 example

{
  "success": true,
  "data": {
    "id": "00000000-0000-0000-0000-000000000000",
    "status": "running",
    "totalTests": 0,
    "model": "string",
    "external": false,
    "message": "string"
  }
}

All status codes

201Eval run created; background execution started (or external run opened).

400(no description)

401(no description)

404(no description)

429(no description)

Code samples

cURL

curl -X POST \
  https://evalguard.ai/api/v1/evals \
  -H "Authorization: Bearer $EVALGUARD_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{ "name": "qa-regression-v2", "projectId": "<Project UUID (or 'default' for MVP). Not>", "model": "gpt-4o", "prompt": "Answer the question: {{input}}", "cases": [ { "input": "string", "expectedOutput": "string" } ], "scorers": [ "exact-match", "faithfulness", "relevance" ], "external": false, "concurrency": { "maxConcurrency": 1, "minConcurrency": 1, "initialConcurrency": 1 } }'

TypeScript

import { EvalGuard } from "@evalguard/sdk";

const client = new EvalGuard({ apiKey: process.env.EVALGUARD_API_KEY });

const response = await client.request({
  method: "POST",
  path: "/api/v1/evals",
  body: {
    "name": "qa-regression-v2",
    "projectId": "<Project UUID (or 'default' for MVP). Not>",
    "model": "gpt-4o",
    "prompt": "Answer the question: {{input}}",
    "cases": [
      {
        "input": "string",
        "expectedOutput": "string"
      }
    ],
    "scorers": [
      "exact-match",
      "faithfulness",
      "relevance"
    ],
    "external": false,
    "concurrency": {
      "maxConcurrency": 1,
      "minConcurrency": 1,
      "initialConcurrency": 1
    }
  },
});
console.log(response);

Python

from evalguard import EvalGuard
import os

client = EvalGuard(api_key=os.environ["EVALGUARD_API_KEY"])

response = client.request(
    method="POST",
    path="/api/v1/evals",
    body={
    "name": "qa-regression-v2",
    "projectId": "<Project UUID (or 'default' for MVP). Not>",
    "model": "gpt-4o",
    "prompt": "Answer the question: {{input}}",
    "cases": [
        {
            "input": "string",
            "expectedOutput": "string"
        }
    ],
    "scorers": [
        "exact-match",
        "faithfulness",
        "relevance"
    ],
    "external": False,
    "concurrency": {
        "maxConcurrency": 1,
        "minConcurrency": 1,
        "initialConcurrency": 1
    }
},
)
print(response)

package main

import (
	"context"
	"fmt"
	"net/http"
	"os"
	"strings"
)

func main() {
	body := strings.NewReader(`{"name":"qa-regression-v2","projectId":"<Project UUID (or 'default' for MVP). Not>","model":"gpt-4o","prompt":"Answer the question: {{input}}","cases":[{"input":"string","expectedOutput":"string"}],"scorers":["exact-match","faithfulness","relevance"],"external":false,"concurrency":{"maxConcurrency":1,"minConcurrency":1,"initialConcurrency":1}}`)
	req, _ := http.NewRequestWithContext(context.Background(), "POST", "https://evalguard.ai/api/v1/evals", body)
	req.Header.Set("Authorization", "Bearer "+os.Getenv("EVALGUARD_API_KEY"))
	req.Header.Set("Content-Type", "application/json")
	resp, err := http.DefaultClient.Do(req)
	if err != nil { panic(err) }
	defer resp.Body.Close()
	fmt.Println(resp.Status)
}

Errors

400401404429

Request body required

Response

Code samples

Errors

Other Evals endpoints