Examples
All Python examples are runnable scripts in the examples/ folder.
Tested with
- cursorpipe 0.1.0
- Cursor Agent CLI v2026.03.25-933d5a6
- Python 3.14
HTTP / curl examples
These work with cursorpipe-server running locally (cursorpipe-server or docker compose up).
Non-streaming completion
curl http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "claude-4.5-sonnet-thinking",
"messages": [{"role": "user", "content": "Explain what an API is in two sentences."}]
}'
Streaming completion
curl http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "claude-4.5-sonnet-thinking",
"messages": [{"role": "user", "content": "Write a haiku about Python."}],
"stream": true
}'
List models
With bearer token auth
curl http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer my-secret-token" \
-d '{"model":"claude-4.5-sonnet-thinking","messages":[{"role":"user","content":"Hello!"}]}'
OpenAI SDK (against cursorpipe-server)
Use the standard OpenAI Python SDK pointed at cursorpipe-server — zero code changes compared to calling OpenAI directly:
from openai import OpenAI
client = OpenAI(base_url="http://localhost:8080/v1", api_key="unused")
# Non-streaming
response = client.chat.completions.create(
model="claude-4.5-sonnet-thinking",
messages=[{"role": "user", "content": "Explain what an API is in two sentences."}],
)
print(response.choices[0].message.content)
# Streaming
stream = client.chat.completions.create(
model="claude-4.5-sonnet-thinking",
messages=[{"role": "user", "content": "Write a haiku about Python."}],
stream=True,
)
for chunk in stream:
delta = chunk.choices[0].delta
if delta.content:
print(delta.content, end="", flush=True)
print()
Python library examples
Basic completion
The simplest way to get a response — one prompt, one answer:
import asyncio
from cursorpipe import CursorClient
async def main():
client = CursorClient()
response = await client.generate(
model="claude-4.5-sonnet-thinking",
prompt="Explain what an API is in two sentences, as if to a 10-year-old.",
)
print(response)
await client.close()
asyncio.run(main())
Warmup (recommended for production)
Pre-start the ACP process and pre-create sessions to eliminate the ~14s cold-start on the first request:
import asyncio
import time
from cursorpipe import CursorClient
async def main():
client = CursorClient()
# Pre-warm at startup
t0 = time.monotonic()
await client.warmup(pool_size=3)
print(f"Warmup: {time.monotonic() - t0:.1f}s")
# First request — same speed as subsequent ones
t1 = time.monotonic()
response = await client.generate(
model="claude-4.5-sonnet-thinking",
prompt="Reply with: WARMUP_OK",
)
print(f"Response in {time.monotonic() - t1:.1f}s: {response.strip()}")
await client.close()
asyncio.run(main())
Streaming
See the response appear word-by-word instead of waiting for the full answer:
import asyncio
from cursorpipe import CursorClient
async def main():
client = CursorClient()
async for chunk in client.stream(
model="claude-4.5-sonnet-thinking",
prompt="Write a short poem about coding at midnight.",
):
print(chunk, end="", flush=True)
print()
await client.close()
asyncio.run(main())
Multi-turn sessions
Sessions keep conversation history on the server — the LLM remembers everything from previous turns:
import asyncio
from cursorpipe import CursorClient
async def main():
client = CursorClient()
async with client.session("claude-4.5-sonnet-thinking") as session:
r1 = await session.prompt("What is 42 * 3?")
print(f"AI: {r1.text}")
r2 = await session.prompt("Now double that result.")
print(f"AI: {r2.text}")
r3 = await session.prompt("What was the original multiplication I asked about?")
print(f"AI: {r3.text}")
print(f"\nTotal turns: {session.turn_count}")
await client.close()
asyncio.run(main())
Framework integration (Chainlit / FastAPI)
For frameworks where session create, use, and destroy happen in different callback functions, use create_session() with explicit lifecycle:
import asyncio
from cursorpipe import CursorClient
client = CursorClient()
async def app_startup():
await client.warmup(pool_size=5)
async def on_chat_start(user_id):
session = await client.create_session("claude-4.5-sonnet-thinking")
return session # store in user session
async def on_message(session, message):
async for chunk in session.stream_prompt(message):
print(chunk, end="", flush=True)
print()
async def on_chat_end(session):
session.discard()
async def main():
await app_startup()
session = await on_chat_start("alice")
await on_message(session, "What is 42 * 3?")
await on_message(session, "Now double that.")
await on_chat_end(session)
await client.close()
asyncio.run(main())
API key authentication
Use an API key instead of interactive agent login — ideal for scripts, CI, and servers:
import asyncio
from cursorpipe import CursorClient, CursorPipeConfig
async def main():
# Option 1: auto-load from CURSORPIPE_API_KEY env var or .env file
client = CursorClient()
# Option 2: pass explicitly
# config = CursorPipeConfig(api_key="crsr_your_key_here")
# client = CursorClient(config)
response = await client.generate(
model="claude-4.5-sonnet-thinking",
prompt="Reply with exactly: AUTH_OK",
system="Reply with exactly what is asked, nothing else.",
)
print("Auth working!" if "AUTH_OK" in response else f"Unexpected: {response}")
await client.close()
asyncio.run(main())
Get your API key at cursor.com/dashboard/cloud-agents.
Model switching
Route different tasks to different models in a single client:
import asyncio
from cursorpipe import CursorClient
async def main():
client = CursorClient()
intent = await client.generate(
model="gpt-5.4-mini-medium",
prompt="Classify this query: 'show top 10 users by revenue'",
system="Reply with exactly one of: SQL_QUERY, SCHEMA_QUESTION, GREETING",
)
print(f"Intent: {intent.strip()}")
sql = await client.generate(
model="claude-4.5-sonnet-thinking",
prompt="Generate a PostgreSQL query for: top 10 users by revenue in 2026",
system="You are a PostgreSQL expert. Reply with only the SQL query.",
)
print(f"SQL:\n{sql.strip()}")
await client.close()
asyncio.run(main())
Session streaming
Stream responses chunk-by-chunk within a multi-turn session:
import asyncio
from cursorpipe import CursorClient
async def main():
client = CursorClient()
async with client.session("claude-4.5-sonnet-thinking") as session:
print("AI: ", end="")
async for chunk in session.stream_prompt(
"Write a haiku about async Python programming."
):
print(chunk, end="", flush=True)
print()
r2 = await session.prompt("Now explain the haiku you just wrote.")
print(f"AI: {r2.text}")
print(f"Total turns: {session.turn_count}")
await client.close()
asyncio.run(main())