OpenAI Responses API
The OpenAI Responses API is a modern alternative to the Chat Completions API. Point your OpenAI SDK to AI Gateway's base URL and use provider/model identifiers to route requests to OpenAI, Anthropic, Google, and more.
Set your SDK's base URL to AI Gateway and use your API key for authentication:
import OpenAI from 'openai';
const client = new OpenAI({
apiKey: process.env.AI_GATEWAY_API_KEY,
baseURL: 'https://ai-gateway.vercel.sh/v1',
});
const response = await client.responses.create({
model: 'anthropic/claude-sonnet-4.6',
input: 'What is the capital of France?',
});
console.log(response.output_text);import os
from openai import OpenAI
client = OpenAI(
api_key=os.getenv('AI_GATEWAY_API_KEY'),
base_url='https://ai-gateway.vercel.sh/v1',
)
response = client.responses.create(
model='anthropic/claude-sonnet-4.6',
input='What is the capital of France?',
)
print(response.output_text)Set stream: true to receive tokens as they're generated. The SDK returns an async iterator of server-sent events:
import OpenAI from 'openai';
const client = new OpenAI({
apiKey: process.env.AI_GATEWAY_API_KEY,
baseURL: 'https://ai-gateway.vercel.sh/v1',
});
const stream = await client.responses.create({
model: 'openai/gpt-5.4',
input: 'Write a haiku about programming.',
stream: true,
});
for await (const event of stream) {
if (event.type === 'response.output_text.delta') {
process.stdout.write(event.delta);
}
}import os
from openai import OpenAI
client = OpenAI(
api_key=os.getenv('AI_GATEWAY_API_KEY'),
base_url='https://ai-gateway.vercel.sh/v1',
)
stream = client.responses.create(
model='openai/gpt-5.4',
input='Write a haiku about programming.',
stream=True,
)
for event in stream:
if event.type == 'response.output_text.delta':
print(event.delta, end='', flush=True)Define tools with JSON Schema parameters. The model can call them, and you can feed the results back in a follow-up request:
import OpenAI from 'openai';
const client = new OpenAI({
apiKey: process.env.AI_GATEWAY_API_KEY,
baseURL: 'https://ai-gateway.vercel.sh/v1',
});
const response = await client.responses.create({
model: 'openai/gpt-5.4',
input: 'What is the weather in San Francisco?',
tools: [
{
type: 'function',
name: 'get_weather',
description: 'Get the current weather for a location',
strict: true,
parameters: {
type: 'object',
properties: {
location: { type: 'string' },
},
required: ['location'],
additionalProperties: false,
},
},
],
});
// The model returns function_call items in the output
for (const item of response.output) {
if (item.type === 'function_call') {
console.log(`Call: ${item.name}(${item.arguments})`);
}
}import os
import json
from openai import OpenAI
client = OpenAI(
api_key=os.getenv('AI_GATEWAY_API_KEY'),
base_url='https://ai-gateway.vercel.sh/v1',
)
response = client.responses.create(
model='openai/gpt-5.4',
input='What is the weather in San Francisco?',
tools=[
{
'type': 'function',
'name': 'get_weather',
'description': 'Get the current weather for a location',
'strict': True,
'parameters': {
'type': 'object',
'properties': {
'location': {'type': 'string'},
},
'required': ['location'],
'additionalProperties': False,
},
},
],
)
for item in response.output:
if item.type == 'function_call':
print(f'Call: {item.name}({item.arguments})')To continue the conversation with tool results, include the function call and its output in the next request's input array:
const functionCall = response.output.find(
(item) => item.type === 'function_call',
);
const followup = await client.responses.create({
model: 'openai/gpt-5.4',
input: [
{ role: 'user', content: 'What is the weather in San Francisco?' },
{
type: 'function_call',
id: functionCall.id,
call_id: functionCall.call_id,
name: functionCall.name,
arguments: functionCall.arguments,
},
{
type: 'function_call_output',
call_id: functionCall.call_id,
output: JSON.stringify({ temperature: 68, condition: 'Sunny' }),
},
],
tools: [
/* same tools as above */
],
});
console.log(followup.output_text);import json
function_call = next(
item for item in response.output if item.type == 'function_call'
)
followup = client.responses.create(
model='openai/gpt-5.4',
input=[
{'role': 'user', 'content': 'What is the weather in San Francisco?'},
{
'type': 'function_call',
'id': function_call.id,
'call_id': function_call.call_id,
'name': function_call.name,
'arguments': function_call.arguments,
},
{
'type': 'function_call_output',
'call_id': function_call.call_id,
'output': json.dumps({'temperature': 68, 'condition': 'Sunny'}),
},
],
tools=[
# same tools as above
],
)
print(followup.output_text)Use text.format to constrain the model's output to a JSON schema:
import OpenAI from 'openai';
const client = new OpenAI({
apiKey: process.env.AI_GATEWAY_API_KEY,
baseURL: 'https://ai-gateway.vercel.sh/v1',
});
const response = await client.responses.create({
model: 'openai/gpt-5.4',
input: 'List 3 colors with their hex codes.',
text: {
format: {
type: 'json_schema',
name: 'colors',
strict: true,
schema: {
type: 'object',
properties: {
colors: {
type: 'array',
items: {
type: 'object',
properties: {
name: { type: 'string' },
hex: { type: 'string' },
},
required: ['name', 'hex'],
additionalProperties: false,
},
},
},
required: ['colors'],
additionalProperties: false,
},
},
},
});
const data = JSON.parse(response.output_text);
console.log(data.colors);import os
import json
from openai import OpenAI
client = OpenAI(
api_key=os.getenv('AI_GATEWAY_API_KEY'),
base_url='https://ai-gateway.vercel.sh/v1',
)
response = client.responses.create(
model='openai/gpt-5.4',
input='List 3 colors with their hex codes.',
text={
'format': {
'type': 'json_schema',
'name': 'colors',
'strict': True,
'schema': {
'type': 'object',
'properties': {
'colors': {
'type': 'array',
'items': {
'type': 'object',
'properties': {
'name': {'type': 'string'},
'hex': {'type': 'string'},
},
'required': ['name', 'hex'],
'additionalProperties': False,
},
},
},
'required': ['colors'],
'additionalProperties': False,
},
},
},
)
data = json.loads(response.output_text)
print(data['colors'])For models that support reasoning, set the reasoning parameter to control how much effort the model spends thinking:
import OpenAI from 'openai';
const client = new OpenAI({
apiKey: process.env.AI_GATEWAY_API_KEY,
baseURL: 'https://ai-gateway.vercel.sh/v1',
});
const response = await client.responses.create({
model: 'anthropic/claude-sonnet-4.6',
input: 'Explain the Monty Hall problem step by step.',
reasoning: {
effort: 'high',
},
max_output_tokens: 2048,
});
console.log(response.output_text);import os
from openai import OpenAI
client = OpenAI(
api_key=os.getenv('AI_GATEWAY_API_KEY'),
base_url='https://ai-gateway.vercel.sh/v1',
)
response = client.responses.create(
model='anthropic/claude-sonnet-4.6',
input='Explain the Monty Hall problem step by step.',
reasoning={
'effort': 'high',
},
max_output_tokens=2048,
)
print(response.output_text)The effort parameter accepts none, minimal, low, medium, high, or xhigh. AI Gateway maps this to provider-specific reasoning settings.
| Parameter | Type | Description |
|---|---|---|
model | string | Model ID in provider/model format (e.g., openai/gpt-5.4, anthropic/claude-sonnet-4.6) |
input | string or array | A text string or array of input items (messages, function calls, function call outputs) |
| Parameter | Type | Description |
|---|---|---|
stream | boolean | Stream tokens via server-sent events. Defaults to false |
max_output_tokens | integer | Maximum number of tokens to generate |
temperature | number | Controls randomness (0-2). Lower values are more deterministic |
top_p | number | Nucleus sampling (0-1) |
presence_penalty | number | Penalizes tokens that already appear in the text so far |
frequency_penalty | number | Penalizes tokens based on their frequency in the text so far |
instructions | string | System-level instructions for the model |
tools | array | Tool definitions for function calling |
tool_choice | string or object | Tool selection: auto, required, none, or a specific function |
parallel_tool_calls | boolean | Allows the model to call multiple tools in a single turn |
allowed_tools | array | Subset of tool names the model can use for this request |
reasoning | object | Reasoning config with effort (none, minimal, low, medium, high, xhigh). OpenAI models also support summary (detailed, auto, concise) to receive a text summary of the model's reasoning |
text | object | Output format config, including json_schema and json_object for structured output |
truncation | string | Truncation strategy for long inputs: auto or disabled |
previous_response_id | string | ID of a previous response for multi-turn conversations |
store | boolean | Stores the response for later retrieval |
metadata | object | Up to 16 key-value pairs for tracking (keys max 64 chars, values max 512 chars) |
caching | string | Enables prompt caching. Only auto is supported |
prompt_cache_key | string | Key to identify cached prompts (max 64 characters) |
The API returns standard HTTP status codes and error responses.
400 Bad Request- Invalid request parameters401 Unauthorized- Invalid or missing authentication403 Forbidden- Insufficient permissions404 Not Found- Model or endpoint not found429 Too Many Requests- Rate limit exceeded500 Internal Server Error- Server error
When an error occurs, the API returns a JSON object with details about what went wrong.
{
"error": {
"type": "invalid_request_error",
"message": "At least one user message is required in the input"
}
}Was this helpful?