Chat & Completion Models

Learn about the two main types of language models available on AnyAPI: chat models designed for conversations and completion models for text generation tasks.

Model Types Overview

Chat Models

Chat models are designed for interactive conversations with a structured message format including system, user, and assistant roles.

Completion Models

Completion models are designed to continue or complete text based on a given prompt, operating on raw text input.

Key Differences

Chat Models

  • Structured conversation format
  • Role-based messages (system/user/assistant)
  • Built-in conversation context
  • Better for dialogue and Q&A

Completion Models

  • Raw text continuation
  • Single prompt input
  • Manual context management
  • Better for text generation tasks

Chat Models API

Chat models use the /v1/chat/completions endpoint with a messages array:
curl -X POST "https://api.anyapi.ai/v1/chat/completions" \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "gpt-4o",
    "messages": [
      {
        "role": "system",
        "content": "You are a helpful assistant."
      },
      {
        "role": "user", 
        "content": "Explain quantum computing in simple terms."
      }
    ],
    "max_tokens": 150,
    "temperature": 0.7
  }'

Message Roles

System Messages

Set the behavior and context for the AI assistant:
{
  "role": "system",
  "content": "You are a professional financial advisor. Provide conservative investment advice and always mention risks."
}

User Messages

Messages from the human user:
{
  "role": "user", 
  "content": "Should I invest in cryptocurrency?"
}

Assistant Messages

Previous responses from the AI (for conversation context):
{
  "role": "assistant",
  "content": "Cryptocurrency investments carry significant risks due to high volatility..."
}

Conversation Context Management

Building Conversation History

class ConversationManager:
    def __init__(self, system_prompt="You are a helpful assistant."):
        self.messages = [
            {"role": "system", "content": system_prompt}
        ]
    
    def add_user_message(self, content):
        self.messages.append({"role": "user", "content": content})
    
    def add_assistant_message(self, content):
        self.messages.append({"role": "assistant", "content": content})
    
    def get_response(self, user_input, model="gpt-4o"):
        # Add user message
        self.add_user_message(user_input)
        
        # Get AI response
        response = requests.post(
            "https://api.anyapi.ai/v1/chat/completions",
            headers={
                "Authorization": "Bearer YOUR_API_KEY",
                "Content-Type": "application/json"
            },
            json={
                "model": model,
                "messages": self.messages,
                "max_tokens": 500,
                "temperature": 0.7
            }
        )
        
        assistant_response = response.json()["choices"][0]["message"]["content"]
        
        # Add assistant response to history
        self.add_assistant_message(assistant_response)
        
        return assistant_response
    
    def clear_conversation(self, keep_system=True):
        if keep_system and self.messages[0]["role"] == "system":
            self.messages = [self.messages[0]]
        else:
            self.messages = []

# Usage
conversation = ConversationManager("You are a coding tutor. Help users learn programming.")

response1 = conversation.get_response("What is a variable in Python?")
print("AI:", response1)

response2 = conversation.get_response("Can you give me an example?")
print("AI:", response2)

response3 = conversation.get_response("How do I change a variable's value?")
print("AI:", response3)

Managing Context Length

def truncate_conversation(messages, max_tokens=4000):
    """Keep conversation within token limits"""
    # Always keep system message if present
    system_message = None
    conversation_messages = messages
    
    if messages and messages[0]["role"] == "system":
        system_message = messages[0]
        conversation_messages = messages[1:]
    
    # Estimate tokens (rough approximation: 1 token ≈ 4 characters)
    def estimate_tokens(text):
        return len(text) / 4
    
    total_tokens = 0
    kept_messages = []
    
    # Keep most recent messages that fit within limit
    for message in reversed(conversation_messages):
        message_tokens = estimate_tokens(message["content"])
        if total_tokens + message_tokens > max_tokens:
            break
        
        kept_messages.insert(0, message)
        total_tokens += message_tokens
    
    # Rebuild messages with system prompt
    result = []
    if system_message:
        result.append(system_message)
    result.extend(kept_messages)
    
    return result

# Usage
conversation = ConversationManager()

# After many exchanges, truncate if needed
if len(conversation.messages) > 20:  # Arbitrary limit
    conversation.messages = truncate_conversation(conversation.messages)

Advanced Chat Patterns

Multi-turn Reasoning

def multi_turn_reasoning(question, model="gpt-4o"):
    """Break down complex questions into steps"""
    
    messages = [
        {
            "role": "system", 
            "content": """You are a logical reasoning assistant. When given a complex question:
            1. Break it down into steps
            2. Work through each step methodically  
            3. Provide a clear final answer
            
            Think step by step and show your reasoning."""
        },
        {
            "role": "user",
            "content": f"Please solve this step by step: {question}"
        }
    ]
    
    response = requests.post(
        "https://api.anyapi.ai/v1/chat/completions",
        headers={
            "Authorization": "Bearer YOUR_API_KEY",
            "Content-Type": "application/json"
        },
        json={
            "model": model,
            "messages": messages,
            "temperature": 0.3  # Lower temperature for more focused reasoning
        }
    )
    
    return response.json()["choices"][0]["message"]["content"]

# Usage
answer = multi_turn_reasoning(
    "A train travels 120 miles in 2 hours, then 180 miles in 3 hours. What's the average speed for the entire journey?"
)
print(answer)

Role-based Conversations

class RoleBasedChat:
    def __init__(self):
        self.conversations = {}
    
    def create_expert(self, expert_type, expertise_description):
        """Create a specialized expert chatbot"""
        system_prompt = f"""You are a {expert_type} expert. {expertise_description}
        
        Provide accurate, detailed information within your area of expertise.
        If asked about topics outside your expertise, politely redirect to your specialization.
        Always be helpful and educational."""
        
        expert_id = expert_type.lower().replace(" ", "_")
        self.conversations[expert_id] = [
            {"role": "system", "content": system_prompt}
        ]
        
        return expert_id
    
    def chat_with_expert(self, expert_id, question, model="gpt-4o"):
        """Chat with a specific expert"""
        if expert_id not in self.conversations:
            raise ValueError(f"Expert {expert_id} not found")
        
        # Add user question
        self.conversations[expert_id].append({
            "role": "user", 
            "content": question
        })
        
        # Get expert response
        response = requests.post(
            "https://api.anyapi.ai/v1/chat/completions",
            headers={
                "Authorization": "Bearer YOUR_API_KEY",
                "Content-Type": "application/json"
            },
            json={
                "model": model,
                "messages": self.conversations[expert_id],
                "temperature": 0.7
            }
        )
        
        assistant_response = response.json()["choices"][0]["message"]["content"]
        
        # Add response to conversation
        self.conversations[expert_id].append({
            "role": "assistant",
            "content": assistant_response
        })
        
        return assistant_response

# Usage
chat_system = RoleBasedChat()

# Create different experts
doctor_id = chat_system.create_expert(
    "Medical Doctor",
    "You have extensive knowledge of medicine, anatomy, and health. Provide medical information for educational purposes but always recommend consulting healthcare professionals for specific medical advice."
)

lawyer_id = chat_system.create_expert(
    "Legal Advisor", 
    "You are knowledgeable about law, legal procedures, and regulations. Provide legal information for educational purposes but always recommend consulting qualified attorneys for specific legal advice."
)

# Chat with experts
medical_advice = chat_system.chat_with_expert(
    doctor_id, 
    "What are the symptoms of dehydration?"
)

legal_advice = chat_system.chat_with_expert(
    lawyer_id,
    "What should I know about tenant rights?"
)

print("Medical Expert:", medical_advice)
print("Legal Expert:", legal_advice)

Best Practices

1. System Prompt Design

# Good system prompt - specific and actionable
good_system_prompt = """You are a customer service representative for TechCorp.

Guidelines:
- Be polite and professional
- Provide specific solutions when possible
- If you cannot help, direct to appropriate resources
- Ask clarifying questions when needed
- Keep responses concise but helpful

Available resources:
- Technical support: tech@techcorp.com
- Billing questions: billing@techcorp.com  
- Returns: returns@techcorp.com"""

# Poor system prompt - too vague
poor_system_prompt = "You are helpful."

2. Temperature Settings

# Different temperatures for different use cases
creative_writing = {
    "temperature": 0.9,  # High creativity
    "top_p": 0.9
}

factual_qa = {
    "temperature": 0.3,  # Low creativity, more focused
    "top_p": 0.8
}

code_generation = {
    "temperature": 0.1,  # Very focused and deterministic
    "top_p": 0.95
}

def get_response(messages, use_case="general"):
    settings = {
        "creative": creative_writing,
        "factual": factual_qa, 
        "code": code_generation
    }
    
    params = settings.get(use_case, {"temperature": 0.7, "top_p": 0.9})
    
    response = requests.post(
        "https://api.anyapi.ai/v1/chat/completions",
        headers={
            "Authorization": "Bearer YOUR_API_KEY",
            "Content-Type": "application/json"
        },
        json={
            "model": "gpt-4o",
            "messages": messages,
            **params
        }
    )
    
    return response.json()["choices"][0]["message"]["content"]

3. Error Handling

def robust_chat_completion(messages, model="gpt-4o", max_retries=3):
    """Chat completion with error handling and retries"""
    
    for attempt in range(max_retries):
        try:
            response = requests.post(
                "https://api.anyapi.ai/v1/chat/completions",
                headers={
                    "Authorization": "Bearer YOUR_API_KEY",
                    "Content-Type": "application/json"
                },
                json={
                    "model": model,
                    "messages": messages,
                    "temperature": 0.7,
                    "max_tokens": 500
                },
                timeout=30  # 30 second timeout
            )
            
            response.raise_for_status()  # Raise exception for HTTP errors
            
            data = response.json()
            
            if "choices" in data and len(data["choices"]) > 0:
                return data["choices"][0]["message"]["content"]
            else:
                raise ValueError("No response choices returned")
                
        except requests.exceptions.Timeout:
            print(f"Attempt {attempt + 1}: Request timed out")
        except requests.exceptions.RequestException as e:
            print(f"Attempt {attempt + 1}: Request failed: {e}")
        except (KeyError, ValueError) as e:
            print(f"Attempt {attempt + 1}: Invalid response format: {e}")
        
        if attempt < max_retries - 1:
            time.sleep(2 ** attempt)  # Exponential backoff
    
    raise Exception("Failed to get response after all retry attempts")

Performance Optimization

Batch Processing

def batch_chat_completions(message_batches, model="gpt-4o"):
    """Process multiple conversations in parallel"""
    import concurrent.futures
    import threading
    
    results = {}
    
    def process_single_chat(batch_id, messages):
        try:
            response = requests.post(
                "https://api.anyapi.ai/v1/chat/completions",
                headers={
                    "Authorization": "Bearer YOUR_API_KEY",
                    "Content-Type": "application/json"
                },
                json={
                    "model": model,
                    "messages": messages,
                    "temperature": 0.7
                }
            )
            
            result = response.json()["choices"][0]["message"]["content"]
            results[batch_id] = {"success": True, "response": result}
            
        except Exception as e:
            results[batch_id] = {"success": False, "error": str(e)}
    
    # Process batches in parallel
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        futures = []
        
        for batch_id, messages in message_batches.items():
            future = executor.submit(process_single_chat, batch_id, messages)
            futures.append(future)
        
        # Wait for all to complete
        concurrent.futures.wait(futures)
    
    return results

# Usage
batches = {
    "summary_1": [
        {"role": "user", "content": "Summarize the benefits of renewable energy"}
    ],
    "summary_2": [
        {"role": "user", "content": "Explain machine learning in simple terms"}
    ],
    "summary_3": [
        {"role": "user", "content": "What are the main causes of climate change?"}
    ]
}

results = batch_chat_completions(batches)
for batch_id, result in results.items():
    if result["success"]:
        print(f"{batch_id}: {result['response'][:100]}...")
    else:
        print(f"{batch_id}: Error - {result['error']}")

Model Selection Guide

When to Use Chat Models

Best for:
  • Interactive conversations
  • Question-answering systems
  • Customer support chatbots
  • Multi-turn dialogues
  • Role-playing scenarios
  • Structured conversations

When to Use Completion Models

Best for:
  • Text continuation
  • Creative writing
  • Code completion
  • Single-shot text generation
  • Template filling
  • Legacy integrations

Model Recommendations

Use CaseRecommended ModelReasoning
Customer SupportClaude 3.5 SonnetExcellent instruction following
Creative WritingGPT-4oStrong creative capabilities
Code AssistanceGPT-4o, Claude 3.5 SonnetStrong coding abilities
Analysis & ResearchClaude 3.5 SonnetGreat analytical thinking
Fast ResponsesGPT-4o-miniSpeed and efficiency
MultilingualGemini 2.5 ProStrong multilingual support

Common Patterns

The Few-Shot Pattern

def few_shot_classification(text, examples, model="gpt-4o"):
    """Classify text using few-shot examples"""
    
    messages = [
        {
            "role": "system",
            "content": "Classify the sentiment of the given text as positive, negative, or neutral based on the examples."
        }
    ]
    
    # Add examples
    for example in examples:
        messages.append({"role": "user", "content": example["text"]})
        messages.append({"role": "assistant", "content": example["label"]})
    
    # Add text to classify
    messages.append({"role": "user", "content": text})
    
    response = requests.post(
        "https://api.anyapi.ai/v1/chat/completions",
        headers={
            "Authorization": "Bearer YOUR_API_KEY",
            "Content-Type": "application/json"
        },
        json={
            "model": model,
            "messages": messages,
            "temperature": 0.3,
            "max_tokens": 10
        }
    )
    
    return response.json()["choices"][0]["message"]["content"].strip()

# Usage
examples = [
    {"text": "I love this product!", "label": "positive"},
    {"text": "This is terrible quality.", "label": "negative"},
    {"text": "It's okay, nothing special.", "label": "neutral"}
]

result = few_shot_classification("The service was amazing!", examples)
print(result)  # Should output: positive

Chain of Thought Reasoning

def chain_of_thought(problem, model="gpt-4o"):
    """Solve problems using chain of thought reasoning"""
    
    messages = [
        {
            "role": "system",
            "content": """Solve problems step by step. For each step:
            1. State what you're doing
            2. Show your work
            3. Explain your reasoning
            4. Move to the next step
            
            End with a clear final answer."""
        },
        {
            "role": "user",
            "content": f"Let's think step by step:\n\n{problem}"
        }
    ]
    
    response = requests.post(
        "https://api.anyapi.ai/v1/chat/completions",
        headers={
            "Authorization": "Bearer YOUR_API_KEY",
            "Content-Type": "application/json"
        },
        json={
            "model": model,
            "messages": messages,
            "temperature": 0.3
        }
    )
    
    return response.json()["choices"][0]["message"]["content"]

# Usage
problem = """
A store sells apples for $1.50 per pound and oranges for $2.00 per pound.
If you buy 3 pounds of apples and 2 pounds of oranges, and you pay with a $20 bill,
how much change will you receive?
"""

solution = chain_of_thought(problem)
print(solution)

Getting Started