Analyze images, extract text, and understand visual content with AI vision models
import requests
import base64
def analyze_image(image_path, prompt="Describe this image in detail"):
"""Analyze an image with AI vision"""
# Encode image to base64
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
response = requests.post(
"https://api.anyapi.ai/v1/chat/completions",
headers={
"Authorization": "Bearer YOUR_API_KEY",
"Content-Type": "application/json"
},
json={
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"max_tokens": 500
}
)
return response.json()["choices"][0]["message"]["content"]
# Usage examples
description = analyze_image("photo.jpg", "What do you see in this image?")
print(description)
# OCR example
text_content = analyze_image("document.png", "Extract all text from this image")
print(text_content)
# Analysis example
analysis = analyze_image("chart.png", "Analyze this chart and explain the trends")
print(analysis)
class DocumentProcessor:
def __init__(self, api_key):
self.api_key = api_key
def extract_text_with_structure(self, image_path):
"""Extract text while preserving document structure"""
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
response = requests.post(
"https://api.anyapi.ai/v1/chat/completions",
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
},
json={
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": """
Extract all text from this document and structure it properly.
Format the output as JSON with these fields:
- title: Document title if present
- sections: Array of sections with headers and content
- tables: Any tables found with structured data
- metadata: Any dates, reference numbers, etc.
- raw_text: All text in reading order
"""
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
]
}
)
return response.json()["choices"][0]["message"]["content"]
def analyze_invoice(self, image_path):
"""Extract invoice information"""
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
response = requests.post(
"https://api.anyapi.ai/v1/chat/completions",
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
},
json={
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": """
Analyze this invoice and extract key information as JSON:
{
"invoice_number": "",
"date": "",
"due_date": "",
"vendor": {
"name": "",
"address": "",
"phone": "",
"email": ""
},
"bill_to": {
"name": "",
"address": ""
},
"items": [
{
"description": "",
"quantity": 0,
"unit_price": 0,
"total": 0
}
],
"subtotal": 0,
"tax": 0,
"total": 0
}
"""
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
]
}
)
return response.json()["choices"][0]["message"]["content"]
def compare_documents(self, image1_path, image2_path):
"""Compare two documents and find differences"""
# Encode both images
with open(image1_path, "rb") as f1, open(image2_path, "rb") as f2:
base64_image1 = base64.b64encode(f1.read()).decode('utf-8')
base64_image2 = base64.b64encode(f2.read()).decode('utf-8')
response = requests.post(
"https://api.anyapi.ai/v1/chat/completions",
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
},
json={
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": """
Compare these two documents and identify:
1. Key differences in content
2. Changes in formatting or structure
3. Added or removed sections
4. Any data discrepancies
Provide a detailed comparison report.
"""
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image1}"
}
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image2}"
}
}
]
}
]
}
)
return response.json()["choices"][0]["message"]["content"]
# Usage
processor = DocumentProcessor("YOUR_API_KEY")
# Extract structured text
structured_data = processor.extract_text_with_structure("contract.pdf")
print(structured_data)
# Process invoice
invoice_data = processor.analyze_invoice("invoice.jpg")
print(invoice_data)
# Compare documents
comparison = processor.compare_documents("version1.pdf", "version2.pdf")
print(comparison)
class EcommerceImageAnalyzer:
def __init__(self, api_key):
self.api_key = api_key
def analyze_product_image(self, image_path):
"""Analyze product image for e-commerce listing"""
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
response = requests.post(
"https://api.anyapi.ai/v1/chat/completions",
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
},
json={
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": """
Analyze this product image for e-commerce use. Provide:
1. Product identification and category
2. Key features and attributes visible
3. Color, material, size estimates
4. Condition assessment
5. Suggested product title and description
6. Keywords for SEO
7. Quality assessment of the photo
Format as JSON with these fields.
"""
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
]
}
)
return response.json()["choices"][0]["message"]["content"]
def check_image_quality(self, image_path):
"""Assess image quality for e-commerce standards"""
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
response = requests.post(
"https://api.anyapi.ai/v1/chat/completions",
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
},
json={
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": """
Assess this product image quality for e-commerce use:
1. Technical quality (resolution, focus, lighting)
2. Composition and framing
3. Background and staging
4. Product visibility and clarity
5. Overall professional appearance
6. Recommendations for improvement
7. Quality score (1-10)
Provide detailed feedback and suggestions.
"""
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
]
}
)
return response.json()["choices"][0]["message"]["content"]
def generate_alt_text(self, image_path):
"""Generate accessibility alt text for images"""
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
response = requests.post(
"https://api.anyapi.ai/v1/chat/completions",
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
},
json={
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": """
Generate concise, descriptive alt text for this product image.
Focus on essential details that help users understand what the product is.
Keep it under 125 characters and be specific about key features.
"""
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
]
}
)
return response.json()["choices"][0]["message"]["content"]
# Usage
analyzer = EcommerceImageAnalyzer("YOUR_API_KEY")
# Analyze product
product_analysis = analyzer.analyze_product_image("product.jpg")
print(product_analysis)
# Check quality
quality_report = analyzer.check_image_quality("product.jpg")
print(quality_report)
# Generate alt text
alt_text = analyzer.generate_alt_text("product.jpg")
print(alt_text)
class ImageModerator:
def __init__(self, api_key):
self.api_key = api_key
def moderate_image_content(self, image_path):
"""Check image for inappropriate content"""
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
response = requests.post(
"https://api.anyapi.ai/v1/chat/completions",
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
},
json={
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": """
Analyze this image for content moderation. Check for:
1. Inappropriate or explicit content
2. Violence or harmful imagery
3. Hate symbols or offensive material
4. Privacy concerns (faces, license plates, etc.)
5. Copyright issues (branded content, logos)
Provide a safety rating:
- SAFE: Appropriate for all audiences
- CAUTION: May need review
- UNSAFE: Violates content policy
Include reasoning for your assessment.
"""
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
]
}
)
return response.json()["choices"][0]["message"]["content"]
# Usage
moderator = ImageModerator("YOUR_API_KEY")
moderation_result = moderator.moderate_image_content("user_upload.jpg")
print(moderation_result)
class VisionChatbot:
def __init__(self, api_key):
self.api_key = api_key
self.conversation_history = []
def add_image_to_conversation(self, image_path, user_message):
"""Add an image and message to ongoing conversation"""
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
# Add user message with image to conversation
self.conversation_history.append({
"role": "user",
"content": [
{
"type": "text",
"text": user_message
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
})
return self.get_response()
def add_text_message(self, message):
"""Add a text-only message to conversation"""
self.conversation_history.append({
"role": "user",
"content": message
})
return self.get_response()
def get_response(self):
"""Get AI response for current conversation"""
response = requests.post(
"https://api.anyapi.ai/v1/chat/completions",
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
},
json={
"model": "gpt-4o",
"messages": self.conversation_history,
"max_tokens": 500
}
)
assistant_response = response.json()["choices"][0]["message"]["content"]
# Add assistant response to conversation
self.conversation_history.append({
"role": "assistant",
"content": assistant_response
})
return assistant_response
def clear_conversation(self):
"""Clear conversation history"""
self.conversation_history = []
# Usage
chatbot = VisionChatbot("YOUR_API_KEY")
# Start conversation with image
response1 = chatbot.add_image_to_conversation(
"vacation_photo.jpg",
"What do you see in this photo?"
)
print("AI:", response1)
# Continue with follow-up questions
response2 = chatbot.add_text_message("What time of day do you think this was taken?")
print("AI:", response2)
response3 = chatbot.add_text_message("What activities would you recommend in this location?")
print("AI:", response3)
# Add another image to same conversation
response4 = chatbot.add_image_to_conversation(
"another_photo.jpg",
"How does this location compare to the first image?"
)
print("AI:", response4)