Skip to main content

Documentation Index

Fetch the complete documentation index at: https://wavefront.rootflo.ai/llms.txt

Use this file to discover all available pages before exploring further.

Supported Providers

Flo AI supports multiple LLM providers with consistent interfaces, allowing you to easily switch between different models and providers.

OpenAI

Basic Configuration

from flo_ai.llm import OpenAI

# Basic OpenAI configuration
llm = OpenAI(
    model='gpt-4o',
    temperature=0.7,
    max_tokens=1000
)

# With additional parameters
llm = OpenAI(
    model='gpt-4o-mini',
    temperature=0.3,
    max_tokens=500,
    timeout=30,
    api_key='your-api-key'  # Optional, can use environment variable
)

Available Models

# GPT-4 models
gpt4 = OpenAI(model='gpt-4o')
gpt4_mini = OpenAI(model='gpt-4o-mini')

# GPT-3.5 models
gpt35 = OpenAI(model='gpt-3.5-turbo')
gpt35_16k = OpenAI(model='gpt-3.5-turbo-16k')

Streaming Support

# Enable streaming for real-time responses
streaming_llm = OpenAI(
    model='gpt-4o',
    stream=True
)

# Use with agent
agent = (
    AgentBuilder()
    .with_name('Streaming Agent')
    .with_prompt('You are a helpful assistant.')
    .with_llm(streaming_llm)
    .build()
)

Anthropic Claude

Basic Configuration

from flo_ai.llm import Anthropic

# Basic Claude configuration
claude = Anthropic(
    model='claude-3-5-sonnet-20240620',
    temperature=0.7,
    max_tokens=1000
)

# With additional parameters
claude = Anthropic(
    model='claude-3-5-haiku-20241022',
    temperature=0.3,
    max_tokens=500,
    timeout=30
)

Available Models

# Claude 3.5 models
claude_sonnet = Anthropic(model='claude-3-5-sonnet-20240620')
claude_haiku = Anthropic(model='claude-3-5-haiku-20241022')

# Claude 3 models
claude_3_sonnet = Anthropic(model='claude-3-sonnet-20240229')
claude_3_haiku = Anthropic(model='claude-3-haiku-20240307')

Google Gemini

Basic Configuration

from flo_ai.llm import Gemini

# Basic Gemini configuration
gemini = Gemini(
    model='gemini-2.5-flash',
    temperature=0.7,
    max_tokens=1000
)

# With additional parameters
gemini = Gemini(
    model='gemini-2.5-pro',
    temperature=0.3,
    max_tokens=500,
    timeout=30
)

Available Models

# Gemini 2.5 models
gemini_flash = Gemini(model='gemini-2.5-flash')
gemini_pro = Gemini(model='gemini-2.5-pro')

# Gemini 1.5 models
gemini_15_flash = Gemini(model='gemini-1.5-flash')
gemini_15_pro = Gemini(model='gemini-1.5-pro')

Google Vertex AI

Configuration

from flo_ai.llm import VertexAI

# Vertex AI configuration
vertex_llm = VertexAI(
    model='gemini-2.5-flash',
    project='your-project-id',
    location='us-central1',
    temperature=0.7
)

# With service account
vertex_llm = VertexAI(
    model='gemini-2.5-pro',
    project='your-project-id',
    credentials_path='path/to/service-account.json',
    location='us-central1'
)

Ollama (Local)

Configuration

from flo_ai.llm import OllamaLLM

# Local Ollama configuration
ollama = OllamaLLM(
    model='llama2',
    base_url='http://localhost:11434',
    temperature=0.7
)

# With custom parameters
ollama = OllamaLLM(
    model='codellama',
    base_url='http://localhost:11434',
    temperature=0.3,
    timeout=60
)
# Code generation
codellama = OllamaLLM(model='codellama')

# General purpose
llama2 = OllamaLLM(model='llama2')
llama3 = OllamaLLM(model='llama3')

# Specialized models
mistral = OllamaLLM(model='mistral')
phi = OllamaLLM(model='phi')

Provider Comparison

ProviderBest ForCostSpeedQuality
GPT-4oComplex reasoningHighMediumExcellent
GPT-4o-miniBalanced tasksMediumFastGood
Claude-3.5-SonnetCreative writingHighMediumExcellent
Claude-3.5-HaikuSimple tasksLowFastGood
Gemini-2.5-ProMultimodal tasksMediumMediumGood
Gemini-2.5-FlashFast responsesLowVery FastGood
OllamaPrivacy/OfflineFreeVariableVariable

Model Selection Guide

For Different Use Cases

# Code generation and analysis
code_llm = OpenAI(model='gpt-4o', temperature=0.1)

# Creative writing
creative_llm = Anthropic(model='claude-3-5-sonnet-20240620', temperature=0.8)

# Data analysis
analysis_llm = OpenAI(model='gpt-4o', temperature=0.2)

# Customer support
support_llm = OpenAI(model='gpt-4o-mini', temperature=0.3)

# Fast responses
fast_llm = Gemini(model='gemini-2.5-flash', temperature=0.3)

Performance Optimization

# For high-volume, simple tasks
efficient_llm = OpenAI(
    model='gpt-4o-mini',
    temperature=0.1,
    max_tokens=200
)

# For complex reasoning
powerful_llm = OpenAI(
    model='gpt-4o',
    temperature=0.2,
    max_tokens=2000
)

Environment Configuration

API Keys

# OpenAI
export OPENAI_API_KEY="your-openai-key"

# Anthropic
export ANTHROPIC_API_KEY="your-anthropic-key"

# Google
export GOOGLE_API_KEY="your-google-key"

# Vertex AI
export GOOGLE_APPLICATION_CREDENTIALS="path/to/service-account.json"
export GOOGLE_CLOUD_PROJECT="your-project-id"

Python Configuration

import os
from flo_ai.llm import OpenAI

# Configure with environment variables
llm = OpenAI(
    model='gpt-4o',
    api_key=os.getenv('OPENAI_API_KEY')
)

# Or use default environment variable names
llm = OpenAI(model='gpt-4o')  # Automatically uses OPENAI_API_KEY

Advanced Configuration

Custom Headers

# Add custom headers for API requests
llm = OpenAI(
    model='gpt-4o',
    headers={
        'X-Custom-Header': 'value',
        'User-Agent': 'MyApp/1.0'
    }
)

Retry Configuration

# Configure retry behavior
llm = OpenAI(
    model='gpt-4o',
    max_retries=3,
    retry_delay=1.0,
    timeout=30
)

Rate Limiting

# Configure rate limiting
llm = OpenAI(
    model='gpt-4o',
    requests_per_minute=60,
    tokens_per_minute=150000
)

Model Switching

Dynamic Model Selection

def get_llm_for_task(task_type: str):
    if task_type == 'creative':
        return Anthropic(model='claude-3-5-sonnet-20240620')
    elif task_type == 'analytical':
        return OpenAI(model='gpt-4o')
    elif task_type == 'fast':
        return Gemini(model='gemini-2.5-flash')
    else:
        return OpenAI(model='gpt-4o-mini')

# Use in agent
task_type = 'creative'
llm = get_llm_for_task(task_type)
agent = AgentBuilder().with_llm(llm).build()

A/B Testing

# Test different models
models = [
    OpenAI(model='gpt-4o'),
    Anthropic(model='claude-3-5-sonnet-20240620'),
    Gemini(model='gemini-2.5-pro')
]

for i, llm in enumerate(models):
    agent = AgentBuilder().with_llm(llm).build()
    response = await agent.run('Test prompt')
    print(f"Model {i+1}: {response}")

Troubleshooting

Common Issues

Ensure your API keys are correctly set:
echo $OPENAI_API_KEY
echo $ANTHROPIC_API_KEY
echo $GOOGLE_API_KEY
If you hit rate limits, implement backoff:
import time
import random

async def with_backoff(func, max_retries=3):
    for attempt in range(max_retries):
        try:
            return await func()
        except RateLimitError:
            wait_time = (2 ** attempt) + random.uniform(0, 1)
            time.sleep(wait_time)
    raise Exception("Max retries exceeded")
Check that the model name is correct and available in your region:
# List available models
from flo_ai.llm import OpenAI

# This will raise an error if model is not available
try:
    llm = OpenAI(model='gpt-4o')
    print("Model is available")
except Exception as e:
    print(f"Model error: {e}")

Best Practices

Model Selection

  1. Start with GPT-4o-mini for most tasks
  2. Use GPT-4o for complex reasoning
  3. Try Claude for creative tasks
  4. Use Gemini for multimodal or fast responses
  5. Use Ollama for privacy-sensitive applications

Cost Optimization

  1. Use appropriate models for task complexity
  2. Implement caching for repeated queries
  3. Set reasonable limits on max_tokens
  4. Monitor usage and costs
  5. Use streaming for long responses

Performance Tips

  1. Batch requests when possible
  2. Use connection pooling for high-volume applications
  3. Implement retry logic with exponential backoff
  4. Cache responses for identical inputs
  5. Monitor latency and optimize accordingly