openapi: 3.1.0
info:
  title: modelux Proxy API
  version: 1.0.0
  description: |
    Runtime API for the modelux LLM proxy — the surface your application
    sends inference traffic to. Two wire-format-compatible families:

      - `/openai/v1/*` — drop-in for the OpenAI SDK (`baseURL`-only swap)
      - `/anthropic/v1/*` — drop-in for the Anthropic SDK

    Both families share auth, modelux extensions, and observability headers.
    The OpenAI surface routes cross-provider; the Anthropic surface routes
    cross-provider too (translating the wire shape on the way out and back).

    For body schemas, modelux mirrors the upstream API shape verbatim — see
    the official OpenAI and Anthropic specs for the request/response field
    catalog. This document focuses on what's modelux-specific:
    auth, the `X-Modelux-*` headers, the `mlx:*` request extensions, the
    response headers carrying decision metadata, and the proxy's own
    endpoints (dry-run, batches, files, Responses, etc.).

    The separate management API spec (organizations, projects, routing
    configs, budgets, etc.) is at `/openapi.yaml`.
  contact:
    name: modelux
    url: https://modelux.ai
  license:
    name: Proprietary
externalDocs:
  description: modelux developer docs
  url: https://modelux.ai/docs
servers:
  - url: https://api.modelux.ai
    description: Production
  - url: http://localhost:8080
    description: Local dev (overmind / docker compose)

security:
  - BearerAuth: []
  - XApiKeyAuth: []

tags:
  - name: OpenAI shape
    description: OpenAI-compatible inference endpoints (`/openai/v1/*`)
  - name: Anthropic shape
    description: Anthropic-compatible inference endpoints (`/anthropic/v1/*`)
  - name: Batches
    description: Async batch processing (50% upstream discount)
  - name: Files
    description: Upload/reference files for batch input or message attachments
  - name: Responses
    description: OpenAI Responses API (`/openai/v1/responses`)
  - name: Routing
    description: Dry-run routing evaluation
  - name: Discovery
    description: Health, model lists

paths:
  # ─── OpenAI surface ──────────────────────────────────────────────────────

  /openai/v1/chat/completions:
    post:
      tags: [OpenAI shape]
      summary: Chat completions (cross-provider routing)
      description: |
        Drop-in for `POST https://api.openai.com/v1/chat/completions`. Same
        request/response shape; routes to whichever provider the `model`
        field (or routing config slug) resolves to. Streaming via
        `stream: true` returns SSE in OpenAI's chunk format.

        OpenAI-specific request fields pass through byte-identical to
        OpenAI-family upstreams and are silently dropped on cross-provider
        routes (Anthropic / Google / Cohere don't have equivalents):
        `response_format` (json_schema strict supported), `seed`,
        `logprobs`, `top_logprobs`, `parallel_tool_calls`.

        See [docs/api/chat-completions](https://modelux.ai/docs/api/chat-completions).
      parameters:
        - $ref: "#/components/parameters/XModeluxUserId"
        - $ref: "#/components/parameters/XModeluxUserTags"
        - $ref: "#/components/parameters/XModeluxTraceId"
        - $ref: "#/components/parameters/XModeluxConversationId"
        - $ref: "#/components/parameters/XModeluxDryRun"
        - $ref: "#/components/parameters/XModeluxProviderKey"
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/ChatCompletionRequest"
      responses:
        "200":
          description: Completion or SSE stream
          headers:
            X-Modelux-Request-Id: { $ref: "#/components/headers/RequestId" }
            X-Modelux-Model-Used: { $ref: "#/components/headers/ModelUsed" }
            X-Modelux-Provider-Used: { $ref: "#/components/headers/ProviderUsed" }
            X-Modelux-Cost-Usd: { $ref: "#/components/headers/CostUsd" }
            X-Modelux-Cache: { $ref: "#/components/headers/Cache" }
            X-Modelux-Cache-Similarity: { $ref: "#/components/headers/CacheSimilarity" }
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ChatCompletionResponse"
            text/event-stream:
              schema:
                type: string
                description: |
                  OpenAI-format SSE events (`data: {…chunk…}\n\n`) when the
                  request set `stream: true`.
        "401": { $ref: "#/components/responses/OpenAIUnauthorized" }
        "402": { $ref: "#/components/responses/BudgetExceeded" }
        "429": { $ref: "#/components/responses/OpenAIRateLimited" }
        "5XX": { $ref: "#/components/responses/OpenAIError" }

  /openai/v1/embeddings:
    post:
      tags: [OpenAI shape]
      summary: Embeddings
      description: Drop-in for OpenAI's `POST /v1/embeddings`. Routes
        cross-provider where the upstream supports embeddings.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              additionalProperties: true
              description: |
                Standard OpenAI embeddings body (`model`, `input`, `encoding_format`,
                `dimensions`, `user`). See OpenAI's spec.
      responses:
        "200":
          description: Embedding(s)
          headers:
            X-Modelux-Request-Id: { $ref: "#/components/headers/RequestId" }
            X-Modelux-Model-Used: { $ref: "#/components/headers/ModelUsed" }
            X-Modelux-Provider-Used: { $ref: "#/components/headers/ProviderUsed" }
            X-Modelux-Cost-Usd: { $ref: "#/components/headers/CostUsd" }
          content:
            application/json:
              schema:
                type: object
                additionalProperties: true
        "401": { $ref: "#/components/responses/OpenAIUnauthorized" }

  /openai/v1/images/generations:
    post:
      tags: [OpenAI shape]
      summary: Image generation
      description: Drop-in for OpenAI's image generation endpoint.
      requestBody:
        required: true
        content:
          application/json:
            schema: { type: object, additionalProperties: true }
      responses:
        "200":
          description: Image(s)
          content:
            application/json: { schema: { type: object, additionalProperties: true } }

  /openai/v1/audio/speech:
    post:
      tags: [OpenAI shape]
      summary: Text-to-speech
      requestBody:
        required: true
        content:
          application/json:
            schema: { type: object, additionalProperties: true }
      responses:
        "200":
          description: Audio bytes (Content-Type from upstream — typically audio/mpeg or audio/opus)
          content:
            application/octet-stream:
              schema: { type: string, format: binary }

  /openai/v1/audio/transcriptions:
    post:
      tags: [OpenAI shape]
      summary: Speech-to-text
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              properties:
                file: { type: string, format: binary }
                model: { type: string }
              required: [file]
              additionalProperties: true
      responses:
        "200":
          description: Transcription
          content:
            application/json: { schema: { type: object, additionalProperties: true } }

  /openai/v1/models:
    get:
      tags: [Discovery]
      summary: List models
      description: Returns OpenAI's `{object:"list",data:[]}` envelope. Currently
        empty — modelux doesn't yet expose a curated model registry. Present so
        SDK probes via `client.models.list()` don't 404.
      responses:
        "200":
          description: Empty list envelope
          content:
            application/json:
              schema:
                type: object
                properties:
                  object: { type: string, example: "list" }
                  data: { type: array, items: { type: object } }

  /openai/v1/dry-run:
    post:
      tags: [Routing]
      summary: Evaluate routing without calling the upstream
      description: |
        Returns the routing decision for a hypothetical request without
        invoking the LLM. Useful for previewing what `@my-config` would
        do against a sample prompt, or validating a routing change before
        promoting it. Same logic as `X-Modelux-Dry-Run: true` on
        `/chat/completions`, just exposed as its own endpoint.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/ChatCompletionRequest"
      responses:
        "200":
          description: Routing decision
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/DryRunResponse"

  # ─── OpenAI Responses API ────────────────────────────────────────────────

  /openai/v1/responses:
    post:
      tags: [Responses]
      summary: Create a response (OpenAI Responses API)
      description: |
        Authenticated thin passthrough to OpenAI's `POST /v1/responses`.
        OpenAI-specific surface — no cross-provider translation (the
        Responses item taxonomy has no faithful Claude / Gemini mapping).
        Sync, SSE streaming, and background mode all supported.
        See [docs/api/openai-responses](https://modelux.ai/docs/api/openai-responses).
      parameters:
        - $ref: "#/components/parameters/XModeluxProviderKey"
      requestBody:
        required: true
        content:
          application/json:
            schema: { type: object, additionalProperties: true }
      responses:
        "200":
          description: "Response object (or SSE stream when `stream: true`)"
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
            text/event-stream: { schema: { type: string } }
  /openai/v1/responses/{id}:
    parameters:
      - { name: id, in: path, required: true, schema: { type: string } }
    get:
      tags: [Responses]
      summary: Retrieve a stored response
      parameters:
        - name: stream
          in: query
          required: false
          schema: { type: boolean }
          description: Set `true` to replay the response as an SSE stream.
      responses:
        "200":
          description: Response object
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
    delete:
      tags: [Responses]
      summary: Delete a stored response
      responses:
        "200":
          description: Deleted
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
  /openai/v1/responses/{id}/cancel:
    parameters:
      - { name: id, in: path, required: true, schema: { type: string } }
    post:
      tags: [Responses]
      summary: Cancel a background response
      responses:
        "200":
          description: Cancellation accepted
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
  /openai/v1/responses/{id}/input_items:
    parameters:
      - { name: id, in: path, required: true, schema: { type: string } }
    get:
      tags: [Responses]
      summary: List input items for a stored response
      parameters:
        - { name: limit, in: query, required: false, schema: { type: integer } }
        - { name: order, in: query, required: false, schema: { type: string, enum: [asc, desc] } }
        - { name: after, in: query, required: false, schema: { type: string } }
        - { name: before, in: query, required: false, schema: { type: string } }
      responses:
        "200":
          description: Paginated list
          content:
            application/json: { schema: { type: object, additionalProperties: true } }

  # ─── OpenAI batches + files ──────────────────────────────────────────────

  /openai/v1/batches:
    post:
      tags: [Batches]
      summary: Create a batch
      description: |
        Drop-in for OpenAI's `POST /v1/batches`. Body references an uploaded
        JSONL file via `input_file_id`. See
        [docs/api/openai-batches](https://modelux.ai/docs/api/openai-batches).
      parameters:
        - $ref: "#/components/parameters/XModeluxProviderKey"
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required: [input_file_id, endpoint, completion_window]
              properties:
                input_file_id: { type: string }
                endpoint: { type: string, example: "/v1/chat/completions" }
                completion_window: { type: string, example: "24h" }
                metadata: { type: object, additionalProperties: { type: string } }
      responses:
        "200":
          description: Batch
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
    get:
      tags: [Batches]
      summary: List batches
      parameters:
        - { name: limit, in: query, required: false, schema: { type: integer } }
        - { name: after, in: query, required: false, schema: { type: string } }
      responses:
        "200":
          description: Paginated list
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
  /openai/v1/batches/{id}:
    parameters:
      - { name: id, in: path, required: true, schema: { type: string } }
    get:
      tags: [Batches]
      summary: Retrieve a batch
      responses:
        "200":
          description: Batch
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
  /openai/v1/batches/{id}/cancel:
    parameters:
      - { name: id, in: path, required: true, schema: { type: string } }
    post:
      tags: [Batches]
      summary: Cancel a batch
      responses:
        "200":
          description: Batch with status=cancelling
          content:
            application/json: { schema: { type: object, additionalProperties: true } }

  /openai/v1/files:
    post:
      tags: [Files]
      summary: Upload a file
      description: |
        `multipart/form-data` upload (typically `purpose: "batch"` for batch
        input JSONL). The proxy buffers the body once before forwarding
        because OpenAI's upload endpoint requires Content-Length.
      parameters:
        - $ref: "#/components/parameters/XModeluxProviderKey"
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              required: [file, purpose]
              properties:
                file: { type: string, format: binary }
                purpose: { type: string, example: batch }
      responses:
        "200":
          description: File metadata
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
    get:
      tags: [Files]
      summary: List files
      parameters:
        - { name: purpose, in: query, required: false, schema: { type: string } }
        - { name: limit, in: query, required: false, schema: { type: integer } }
        - { name: order, in: query, required: false, schema: { type: string } }
        - { name: after, in: query, required: false, schema: { type: string } }
      responses:
        "200":
          description: Paginated list
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
  /openai/v1/files/{id}:
    parameters:
      - { name: id, in: path, required: true, schema: { type: string } }
    get:
      tags: [Files]
      summary: Retrieve file metadata
      responses:
        "200":
          description: Metadata
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
    delete:
      tags: [Files]
      summary: Delete a file
      responses:
        "200":
          description: Deleted
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
  /openai/v1/files/{id}/content:
    parameters:
      - { name: id, in: path, required: true, schema: { type: string } }
    get:
      tags: [Files]
      summary: Download file content
      description: |
        Streams the file body (any Content-Type — JSONL for batch results,
        binary for other purposes) via `io.Copy`. No buffering, so multi-MB
        result files don't burden the proxy.
      responses:
        "200":
          description: File bytes
          content:
            application/octet-stream:
              schema: { type: string, format: binary }

  # ─── Anthropic surface ───────────────────────────────────────────────────

  /anthropic/v1/messages:
    post:
      tags: [Anthropic shape]
      summary: Messages (cross-provider routing)
      description: |
        Drop-in for Anthropic's `POST /v1/messages`. Same content-block
        format on the request and response side; routes cross-provider
        with translation. Streaming returns Anthropic's full event taxonomy
        (`message_start` → `content_block_*` → `message_delta` → `message_stop`).

        Anthropic native prompt caching (`cache_control` markers on
        messages, system, and tools) passes through verbatim — Anthropic /
        Bedrock-Claude upstreams apply the cache discount on the next
        matching request. Cache hit / write counts surface in the request
        log.

        See [docs/api/anthropic-messages](https://modelux.ai/docs/api/anthropic-messages).
      parameters:
        - $ref: "#/components/parameters/XModeluxUserId"
        - $ref: "#/components/parameters/XModeluxUserTags"
        - $ref: "#/components/parameters/XModeluxTraceId"
        - $ref: "#/components/parameters/XModeluxConversationId"
        - $ref: "#/components/parameters/XModeluxDryRun"
        - $ref: "#/components/parameters/XModeluxProviderKey"
        - name: anthropic-version
          in: header
          required: false
          schema: { type: string, example: "2023-06-01" }
        - name: anthropic-beta
          in: header
          required: false
          schema: { type: string }
          description: Forwarded verbatim to upstream (Anthropic's beta tags).
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/AnthropicMessagesRequest"
      responses:
        "200":
          description: "Message (or SSE stream when `stream: true`)"
          headers:
            X-Modelux-Request-Id: { $ref: "#/components/headers/RequestId" }
            X-Modelux-Model-Used: { $ref: "#/components/headers/ModelUsed" }
            X-Modelux-Provider-Used: { $ref: "#/components/headers/ProviderUsed" }
            X-Modelux-Cost-Usd: { $ref: "#/components/headers/CostUsd" }
            X-Modelux-Cache: { $ref: "#/components/headers/Cache" }
          content:
            application/json: { schema: { $ref: "#/components/schemas/AnthropicMessageResponse" } }
            text/event-stream: { schema: { type: string } }
        "400": { $ref: "#/components/responses/AnthropicError" }
        "401": { $ref: "#/components/responses/AnthropicError" }
        "5XX": { $ref: "#/components/responses/AnthropicError" }

  /anthropic/v1/messages/count_tokens:
    post:
      tags: [Anthropic shape]
      summary: Count tokens
      description: Forwards to Anthropic's count_tokens endpoint verbatim.
      requestBody:
        required: true
        content:
          application/json:
            schema: { type: object, additionalProperties: true }
      responses:
        "200":
          description: Token count
          content:
            application/json:
              schema:
                type: object
                properties:
                  input_tokens: { type: integer }

  /anthropic/v1/models:
    get:
      tags: [Discovery]
      summary: List Anthropic models
      description: Returns Anthropic's pagination envelope (currently empty).
      responses:
        "200":
          description: Pagination envelope
          content:
            application/json:
              schema:
                type: object
                properties:
                  data: { type: array, items: { type: object } }
                  has_more: { type: boolean }

  # ─── Anthropic batches ───────────────────────────────────────────────────

  /anthropic/v1/messages/batches:
    post:
      tags: [Batches]
      summary: Create a message batch (Anthropic)
      description: |
        Drop-in for Anthropic's `POST /v1/messages/batches`. Body is an
        array of independent message requests, each with a caller-supplied
        `custom_id`. See
        [docs/api/anthropic-batches](https://modelux.ai/docs/api/anthropic-batches).
      parameters:
        - $ref: "#/components/parameters/XModeluxProviderKey"
        - { name: anthropic-version, in: header, required: false, schema: { type: string } }
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required: [requests]
              properties:
                requests:
                  type: array
                  minItems: 1
                  items:
                    type: object
                    required: [custom_id, params]
                    properties:
                      custom_id: { type: string }
                      params:
                        type: object
                        description: Inner request body (same shape as `/anthropic/v1/messages`).
                        additionalProperties: true
      responses:
        "200":
          description: Batch
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
    get:
      tags: [Batches]
      summary: List message batches (Anthropic)
      parameters:
        - { name: limit, in: query, required: false, schema: { type: integer } }
        - { name: after_id, in: query, required: false, schema: { type: string } }
        - { name: before_id, in: query, required: false, schema: { type: string } }
      responses:
        "200":
          description: Paginated list
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
  /anthropic/v1/messages/batches/{id}:
    parameters:
      - { name: id, in: path, required: true, schema: { type: string } }
    get:
      tags: [Batches]
      summary: Retrieve a message batch
      responses:
        "200":
          description: Batch
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
    delete:
      tags: [Batches]
      summary: Delete a message batch
      description: Allowed only after the batch has reached a terminal state.
      responses:
        "200":
          description: Deleted
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
  /anthropic/v1/messages/batches/{id}/cancel:
    parameters:
      - { name: id, in: path, required: true, schema: { type: string } }
    post:
      tags: [Batches]
      summary: Cancel a message batch
      responses:
        "200":
          description: Batch with processing_status=canceling
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
  /anthropic/v1/messages/batches/{id}/results:
    parameters:
      - { name: id, in: path, required: true, schema: { type: string } }
    get:
      tags: [Batches]
      summary: Download batch results (JSONL stream)
      description: |
        Streams the JSONL results file (one line per sub-request, keyed by
        the `custom_id` from the original request). Content-Type passed
        through from upstream. Streaming via `io.Copy` so arbitrarily large
        result files don't burden the proxy.
      responses:
        "200":
          description: JSONL bytes
          content:
            application/x-jsonlines:
              schema: { type: string }

  # ─── Anthropic files ─────────────────────────────────────────────────────

  /anthropic/v1/files:
    post:
      tags: [Files]
      summary: Upload a file (Anthropic, beta)
      description: |
        `multipart/form-data` upload. The proxy forwards your
        `anthropic-beta` header verbatim so the SDK's beta-tag declaration
        reaches the upstream untouched. See
        [docs/api/anthropic-files](https://modelux.ai/docs/api/anthropic-files).
      parameters:
        - $ref: "#/components/parameters/XModeluxProviderKey"
        - { name: anthropic-beta, in: header, required: true, schema: { type: string }, description: "Required for the Files API beta — pass the tag your SDK declares." }
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              required: [file]
              properties:
                file: { type: string, format: binary }
      responses:
        "200":
          description: File metadata
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
    get:
      tags: [Files]
      summary: List files (Anthropic)
      parameters:
        - { name: anthropic-beta, in: header, required: true, schema: { type: string } }
        - { name: limit, in: query, required: false, schema: { type: integer } }
        - { name: after_id, in: query, required: false, schema: { type: string } }
        - { name: before_id, in: query, required: false, schema: { type: string } }
      responses:
        "200":
          description: Paginated list
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
  /anthropic/v1/files/{id}:
    parameters:
      - { name: id, in: path, required: true, schema: { type: string } }
    get:
      tags: [Files]
      summary: Retrieve file metadata (Anthropic)
      parameters:
        - { name: anthropic-beta, in: header, required: true, schema: { type: string } }
      responses:
        "200":
          description: Metadata
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
    delete:
      tags: [Files]
      summary: Delete a file (Anthropic)
      parameters:
        - { name: anthropic-beta, in: header, required: true, schema: { type: string } }
      responses:
        "200":
          description: Deleted
          content:
            application/json: { schema: { type: object, additionalProperties: true } }
  /anthropic/v1/files/{id}/content:
    parameters:
      - { name: id, in: path, required: true, schema: { type: string } }
    get:
      tags: [Files]
      summary: Download file content (Anthropic)
      description: |
        Restricted to API-generated files (e.g., outputs from computer-use
        tool calls); user-uploaded files come back with a `400 invalid_request_error: File is not downloadable`. The proxy passes that
        error envelope through verbatim.
      parameters:
        - { name: anthropic-beta, in: header, required: true, schema: { type: string } }
      responses:
        "200":
          description: File bytes
          content:
            application/octet-stream:
              schema: { type: string, format: binary }
        "400":
          description: Not downloadable (Anthropic restriction)
          content:
            application/json: { schema: { $ref: "#/components/schemas/AnthropicError" } }

  # ─── Health ──────────────────────────────────────────────────────────────

  /health:
    get:
      tags: [Discovery]
      summary: Liveness probe
      security: []
      responses:
        "200":
          description: ok
          content:
            text/plain:
              schema: { type: string, example: "ok" }

components:
  securitySchemes:
    BearerAuth:
      type: http
      scheme: bearer
      description: |
        modelux API key (`mlx_sk_*`) in the `Authorization: Bearer …` header.
        The Anthropic surface also accepts the same key as `x-api-key: mlx_sk_…`
        so the official Anthropic SDKs work as drop-in clients.
    XApiKeyAuth:
      type: apiKey
      in: header
      name: x-api-key
      description: |
        modelux API key (`mlx_sk_*`) — accepted on `/anthropic/v1/*` only,
        for SDK drop-in compatibility. On `/openai/v1/*` use Bearer auth.

  parameters:
    XModeluxUserId:
      name: X-Modelux-User-Id
      in: header
      required: false
      schema: { type: string }
      description: |
        End-user identifier — used as the routing input for
        sticky-bucketing policies (A/B tests, percentage rollouts) and
        for per-user spend analytics. Overrides the OpenAI-shape `user`
        field in the body when both are present.
    XModeluxUserTags:
      name: X-Modelux-User-Tags
      in: header
      required: false
      schema: { type: string, example: "tier=premium,cohort=beta" }
      description: |
        Comma-separated `key=value` pairs attached to the request log
        and available as `tags.<key>` to routing rules. Use for
        per-tenant / per-feature analytics breakdowns.
    XModeluxTraceId:
      name: X-Modelux-Trace-Id
      in: header
      required: false
      schema: { type: string }
      description: |
        Groups LLM calls that serve a single agent run / user turn
        (the fan-out of tool loops or query-rewrite → retrieve →
        synthesize chains). Name matches OpenTelemetry / LangSmith /
        Langfuse vocabulary. Stored on `request_logs.trace_id` and
        surfaces in the dashboard's logs view. One conversation
        contains many trace_ids.

        Body-metadata fallback: when this header isn't set, modelux
        reads `metadata.trace_id` from the request body (both the
        OpenAI-compatible `metadata` map on chat completions and the
        Anthropic `metadata` object). The metadata is stripped before
        the outbound provider call.
    XModeluxConversationId:
      name: X-Modelux-Conversation-Id
      in: header
      required: false
      schema: { type: string }
      description: |
        Groups turns in a long-lived thread (a chatbot session
        spanning many user↔assistant exchanges). One conversation
        contains many trace_ids; one trace_id contains many requests.
        Stored on `request_logs.conversation_id`.

        Body-metadata fallback: same as `X-Modelux-Trace-Id` — if
        the header is absent, modelux reads `metadata.conversation_id`
        from the request body.
    XModeluxDryRun:
      name: X-Modelux-Dry-Run
      in: header
      required: false
      schema: { type: string, enum: ["true", "1"] }
      description: |
        When set, evaluate routing without calling the upstream. The
        proxy returns the routing decision (policy, candidates, selected
        target) instead of an LLM response. Useful for previewing what
        a routing config would do against a sample request.
    XModeluxProviderKey:
      name: X-Modelux-Provider-Key
      in: header
      required: false
      schema: { type: string }
      description: |
        BYOK passthrough — provider key (sk-…, AIza…, etc.) used for
        this single call. Wins over the org's stored credential. Never
        stored. Only honored on direct calls (`provider/model` or known
        bare prefix), not `@config` slugs. The base URL still comes
        from any stored credential, so self-hosted endpoints work with
        BYOK.

  headers:
    RequestId:
      description: modelux's request log row id (UUID). Use to look up the row in the dashboard or via `GET /api/manage/v1/logs/{id}`.
      schema: { type: string }
    ModelUsed:
      description: The model that actually served the request (after fallbacks / cascades). May differ from the body's `model` field.
      schema: { type: string }
    ProviderUsed:
      description: The upstream provider that served the request (`openai`, `anthropic`, `google`, `bedrock`, `azure`, etc.).
      schema: { type: string }
    CostUsd:
      description: Computed cost in USD for this request (prompt + completion + cache adjustments).
      schema: { type: string, example: "0.002134" }
    Cache:
      description: |
        modelux's semantic-cache outcome. `HIT` = served from cache, no upstream call;
        `MISS` = stored if cache enabled.
      schema: { type: string, enum: [HIT, MISS] }
    CacheSimilarity:
      description: Cosine similarity (0.0–1.0) for cache hits.
      schema: { type: string, example: "0.9876" }

  responses:
    OpenAIUnauthorized:
      description: Missing or invalid modelux API key
      content:
        application/json:
          schema: { $ref: "#/components/schemas/OpenAIError" }
    OpenAIRateLimited:
      description: Per-key rate limit exceeded
      content:
        application/json:
          schema: { $ref: "#/components/schemas/OpenAIError" }
    OpenAIError:
      description: OpenAI-shape error envelope
      content:
        application/json:
          schema: { $ref: "#/components/schemas/OpenAIError" }
    AnthropicError:
      description: Anthropic-shape error envelope
      content:
        application/json:
          schema: { $ref: "#/components/schemas/AnthropicError" }
    BudgetExceeded:
      description: Hard-cap budget exceeded
      content:
        application/json:
          schema:
            type: object
            properties:
              error:
                type: object
                properties:
                  type: { type: string, example: "budget_exceeded" }
                  message: { type: string }
                  budget:
                    type: object
                    properties:
                      name: { type: string }
                      spend_usd: { type: string }
                      cap_usd: { type: string }
                      period: { type: string }
                      retry_after: { type: integer, description: "Seconds until the budget resets" }

  schemas:
    ChatCompletionRequest:
      type: object
      required: [model, messages]
      properties:
        model:
          type: string
          description: |
            Model name (`gpt-4o-mini`, `claude-sonnet-4-5`, `gemini-2.5-flash`,
            `provider/model` form, or a routing config slug like `@production`).
        messages:
          type: array
          items: { type: object, additionalProperties: true }
        stream: { type: boolean }
        temperature: { type: number }
        max_tokens: { type: integer }
        max_completion_tokens: { type: integer }
        tools:
          type: array
          items: { type: object, additionalProperties: true }
        tool_choice:
          oneOf:
            - { type: string }
            - { type: object, additionalProperties: true }
        response_format:
          type: object
          additionalProperties: true
          description: OpenAI structured-output spec (json_object or json_schema with strict mode).
        seed:
          type: integer
          description: Reproducible-sampling seed (OpenAI-family upstreams only).
        logprobs:
          type: boolean
          description: Include per-token log-probabilities (OpenAI-family upstreams only).
        top_logprobs:
          type: integer
          description: Number of alternative tokens to score per position (0–20).
        parallel_tool_calls:
          type: boolean
          description: Set false to force serial tool execution. Default true upstream.
        user:
          type: string
          description: End-user identifier (overridden by `X-Modelux-User-Id` if present).
      additionalProperties: true
      description: |
        Standard OpenAI chat-completion body. Fields beyond those listed
        pass through unchanged for upstreams that recognize them.

    ChatCompletionResponse:
      type: object
      additionalProperties: true
      description: Standard OpenAI chat completion response shape.

    AnthropicMessagesRequest:
      type: object
      required: [model, max_tokens, messages]
      properties:
        model: { type: string }
        max_tokens: { type: integer }
        messages:
          type: array
          items: { type: object, additionalProperties: true }
        system:
          oneOf:
            - { type: string }
            - { type: array, items: { type: object, additionalProperties: true } }
          description: Plain string OR array of typed blocks (required for `cache_control` on system).
        tools:
          type: array
          items:
            type: object
            additionalProperties: true
            properties:
              cache_control:
                type: object
                additionalProperties: true
                description: Anthropic prompt-cache marker (typically `{"type":"ephemeral"}`).
        tool_choice: { type: object, additionalProperties: true }
        thinking:
          type: object
          additionalProperties: true
          description: Extended-thinking config for Claude models.
        stream: { type: boolean }
        temperature: { type: number }
      additionalProperties: true

    AnthropicMessageResponse:
      type: object
      additionalProperties: true

    DryRunResponse:
      type: object
      properties:
        dry_run: { type: boolean, example: true }
        routing_policy: { type: string }
        targets:
          type: array
          items:
            type: object
            properties:
              model: { type: string }
              provider: { type: string }
              credential_id: { type: string }
              reason: { type: string }
        trace:
          type: array
          items:
            type: object
            additionalProperties: true
        matched_rule: { type: string, nullable: true }
      additionalProperties: true

    OpenAIError:
      type: object
      properties:
        error:
          type: object
          properties:
            type: { type: string }
            message: { type: string }
            code: { type: string, nullable: true }
            param: { type: string, nullable: true }

    AnthropicError:
      type: object
      properties:
        type: { type: string, example: error }
        error:
          type: object
          properties:
            type: { type: string }
            message: { type: string }