LLM Prompt Templates: Building Maintainable Prompt Systems

Introduction: Hardcoded prompts are a maintenance nightmare. When prompts are scattered across your codebase as string literals, updating them requires code changes, testing, and deployment. Prompt templates solve this by separating prompt logic from application code. This guide covers building a robust prompt template system: variable substitution, conditional sections, template inheritance, version control, and A/B testing. These patterns let you iterate on prompts without touching code, enabling faster experimentation and safer production deployments.

Prompt Templates
Prompt Templates: From Variables to Final Prompt

Basic Template System

from string import Template
from typing import Any
import re

class SimplePromptTemplate:
    """Basic prompt template with variable substitution."""
    
    def __init__(self, template: str):
        self.template = template
        self.variables = self._extract_variables()
    
    def _extract_variables(self) -> set[str]:
        """Extract variable names from template."""
        # Match {variable_name} patterns
        return set(re.findall(r'\{(\w+)\}', self.template))
    
    def render(self, **kwargs) -> str:
        """Render template with provided variables."""
        
        # Check for missing variables
        missing = self.variables - set(kwargs.keys())
        if missing:
            raise ValueError(f"Missing variables: {missing}")
        
        # Substitute variables
        result = self.template
        for key, value in kwargs.items():
            result = result.replace(f"{{{key}}}", str(value))
        
        return result

# Usage
template = SimplePromptTemplate("""
You are a {role} assistant.

Task: {task}

Context:
{context}

Please provide a {output_format} response.
""")

prompt = template.render(
    role="helpful coding",
    task="Review this Python function for bugs",
    context="def add(a, b): return a - b",
    output_format="detailed"
)

print(prompt)

Advanced Template with Conditionals

from dataclasses import dataclass
from typing import Optional
import jinja2

class JinjaPromptTemplate:
    """Prompt template using Jinja2 for advanced features."""
    
    def __init__(self, template: str):
        self.env = jinja2.Environment(
            undefined=jinja2.StrictUndefined,
            trim_blocks=True,
            lstrip_blocks=True
        )
        self.template = self.env.from_string(template)
    
    def render(self, **kwargs) -> str:
        """Render template with Jinja2."""
        return self.template.render(**kwargs)

# Template with conditionals and loops
advanced_template = JinjaPromptTemplate("""
You are a {{ role }} assistant.

{% if system_instructions %}
System Instructions:
{{ system_instructions }}
{% endif %}

Task: {{ task }}

{% if examples %}
Examples:
{% for example in examples %}
Input: {{ example.input }}
Output: {{ example.output }}
{% endfor %}
{% endif %}

{% if constraints %}
Constraints:
{% for constraint in constraints %}
- {{ constraint }}
{% endfor %}
{% endif %}

User Input: {{ user_input }}

{% if output_format == "json" %}
Respond with valid JSON only.
{% elif output_format == "markdown" %}
Format your response using Markdown.
{% else %}
Provide a clear, concise response.
{% endif %}
""")

prompt = advanced_template.render(
    role="data analysis",
    task="Analyze the provided dataset",
    examples=[
        {"input": "sales data", "output": "trend analysis"},
        {"input": "user metrics", "output": "engagement report"}
    ],
    constraints=["Be concise", "Use specific numbers", "Cite sources"],
    user_input="Q3 revenue figures",
    output_format="json"
)

print(prompt)

Template Registry

from pathlib import Path
import yaml
from dataclasses import dataclass
from typing import Optional

@dataclass
class PromptConfig:
    name: str
    version: str
    template: str
    model: str = "gpt-4o"
    temperature: float = 0.7
    max_tokens: int = 1000
    description: str = ""

class PromptRegistry:
    """Centralized prompt template management."""
    
    def __init__(self, prompts_dir: Path = Path("./prompts")):
        self.prompts_dir = prompts_dir
        self.prompts: dict[str, PromptConfig] = {}
        self._load_prompts()
    
    def _load_prompts(self):
        """Load all prompts from directory."""
        
        if not self.prompts_dir.exists():
            return
        
        for file_path in self.prompts_dir.glob("*.yaml"):
            config = yaml.safe_load(file_path.read_text())
            
            prompt = PromptConfig(
                name=config["name"],
                version=config.get("version", "1.0"),
                template=config["template"],
                model=config.get("model", "gpt-4o"),
                temperature=config.get("temperature", 0.7),
                max_tokens=config.get("max_tokens", 1000),
                description=config.get("description", "")
            )
            
            self.prompts[prompt.name] = prompt
    
    def get(self, name: str) -> PromptConfig:
        """Get prompt by name."""
        
        if name not in self.prompts:
            raise KeyError(f"Prompt not found: {name}")
        
        return self.prompts[name]
    
    def render(self, name: str, **kwargs) -> tuple[str, PromptConfig]:
        """Render prompt and return with config."""
        
        config = self.get(name)
        template = JinjaPromptTemplate(config.template)
        rendered = template.render(**kwargs)
        
        return rendered, config
    
    def list_prompts(self) -> list[str]:
        """List all available prompts."""
        return list(self.prompts.keys())

# Example prompt YAML file (prompts/summarize.yaml):
"""
name: summarize
version: "2.1"
description: Summarize text content
model: gpt-4o-mini
temperature: 0.3
max_tokens: 500
template: |
  Summarize the following {{ content_type }} in {{ length }} sentences.
  
  {% if focus_areas %}
  Focus on: {{ focus_areas | join(", ") }}
  {% endif %}
  
  Content:
  {{ content }}
  
  Summary:
"""

# Usage
registry = PromptRegistry(Path("./prompts"))

prompt, config = registry.render(
    "summarize",
    content_type="article",
    length=3,
    focus_areas=["key findings", "implications"],
    content="Long article text here..."
)

# Use config for API call
from openai import OpenAI
client = OpenAI()

response = client.chat.completions.create(
    model=config.model,
    messages=[{"role": "user", "content": prompt}],
    temperature=config.temperature,
    max_tokens=config.max_tokens
)

Template Inheritance

class InheritableTemplate:
    """Templates that can extend base templates."""
    
    def __init__(self):
        self.templates: dict[str, str] = {}
        self.parents: dict[str, str] = {}
    
    def register(self, name: str, template: str, extends: str = None):
        """Register a template, optionally extending another."""
        
        self.templates[name] = template
        if extends:
            self.parents[name] = extends
    
    def _get_full_template(self, name: str) -> str:
        """Get template with inheritance resolved."""
        
        if name not in self.templates:
            raise KeyError(f"Template not found: {name}")
        
        template = self.templates[name]
        
        # Check for parent
        if name in self.parents:
            parent_name = self.parents[name]
            parent_template = self._get_full_template(parent_name)
            
            # Replace {child_content} in parent with child template
            template = parent_template.replace("{child_content}", template)
        
        return template
    
    def render(self, name: str, **kwargs) -> str:
        """Render template with inheritance."""
        
        full_template = self._get_full_template(name)
        jinja_template = JinjaPromptTemplate(full_template)
        
        return jinja_template.render(**kwargs)

# Usage
templates = InheritableTemplate()

# Base template
templates.register("base", """
You are a {{ role }} assistant created by {{ company }}.

Core principles:
- Be helpful and accurate
- Admit when you don't know something
- Follow safety guidelines

{child_content}
""")

# Child templates extending base
templates.register("customer_support", """
You are handling a customer support inquiry.

Customer: {{ customer_name }}
Issue: {{ issue }}

Previous interactions:
{% for interaction in history %}
- {{ interaction }}
{% endfor %}

Please help resolve this issue professionally.
""", extends="base")

templates.register("code_review", """
You are reviewing code for quality and bugs.

Language: {{ language }}
Code:
```{{ language }}
{{ code }}
```

Review for: {{ review_focus | join(", ") }}
""", extends="base")

# Render child template (includes base)
prompt = templates.render(
    "customer_support",
    role="customer support",
    company="TechCorp",
    customer_name="John",
    issue="Cannot login to account",
    history=["Tried password reset", "Cleared cookies"]
)

print(prompt)

A/B Testing Prompts

import random
import hashlib
from dataclasses import dataclass
from datetime import datetime

@dataclass
class PromptVariant:
    name: str
    template: str
    weight: float = 1.0

@dataclass
class ABTestResult:
    variant_name: str
    prompt: str
    test_id: str

class PromptABTester:
    """A/B test different prompt variants."""
    
    def __init__(self, test_name: str):
        self.test_name = test_name
        self.variants: list[PromptVariant] = []
        self.results: list[dict] = []
    
    def add_variant(self, name: str, template: str, weight: float = 1.0):
        """Add a prompt variant."""
        self.variants.append(PromptVariant(name, template, weight))
    
    def select_variant(self, user_id: str = None) -> PromptVariant:
        """Select variant (deterministic if user_id provided)."""
        
        if user_id:
            # Deterministic selection based on user_id
            hash_input = f"{self.test_name}:{user_id}"
            hash_value = int(hashlib.md5(hash_input.encode()).hexdigest(), 16)
            
            total_weight = sum(v.weight for v in self.variants)
            threshold = (hash_value % 1000) / 1000 * total_weight
            
            cumulative = 0
            for variant in self.variants:
                cumulative += variant.weight
                if threshold < cumulative:
                    return variant
            
            return self.variants[-1]
        else:
            # Random selection weighted by weight
            weights = [v.weight for v in self.variants]
            return random.choices(self.variants, weights=weights)[0]
    
    def render(self, user_id: str = None, **kwargs) -> ABTestResult:
        """Render a randomly selected variant."""
        
        variant = self.select_variant(user_id)
        template = JinjaPromptTemplate(variant.template)
        prompt = template.render(**kwargs)
        
        test_id = f"{self.test_name}_{datetime.now().timestamp()}"
        
        return ABTestResult(
            variant_name=variant.name,
            prompt=prompt,
            test_id=test_id
        )
    
    def record_result(self, test_id: str, variant_name: str, metrics: dict):
        """Record test result for analysis."""
        
        self.results.append({
            "test_id": test_id,
            "variant": variant_name,
            "timestamp": datetime.now().isoformat(),
            **metrics
        })
    
    def analyze(self) -> dict:
        """Analyze A/B test results."""
        
        from collections import defaultdict
        
        variant_metrics = defaultdict(lambda: {"count": 0, "scores": []})
        
        for result in self.results:
            variant = result["variant"]
            variant_metrics[variant]["count"] += 1
            
            if "score" in result:
                variant_metrics[variant]["scores"].append(result["score"])
        
        analysis = {}
        for variant, metrics in variant_metrics.items():
            scores = metrics["scores"]
            analysis[variant] = {
                "count": metrics["count"],
                "avg_score": sum(scores) / len(scores) if scores else 0,
                "min_score": min(scores) if scores else 0,
                "max_score": max(scores) if scores else 0
            }
        
        return analysis

# Usage
tester = PromptABTester("summarization_v2")

tester.add_variant(
    "concise",
    "Summarize in 2 sentences: {{ text }}",
    weight=1.0
)

tester.add_variant(
    "detailed",
    "Provide a comprehensive summary with key points: {{ text }}",
    weight=1.0
)

tester.add_variant(
    "bullet_points",
    "Summarize as 3-5 bullet points: {{ text }}",
    weight=0.5  # Less traffic to this variant
)

# Get prompt for user
result = tester.render(user_id="user_123", text="Long article...")

print(f"Using variant: {result.variant_name}")
print(f"Prompt: {result.prompt}")

Production Template Service

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional

app = FastAPI()

class RenderRequest(BaseModel):
    template_name: str
    variables: dict
    user_id: Optional[str] = None

class RenderResponse(BaseModel):
    prompt: str
    model: str
    temperature: float
    variant: Optional[str] = None

# Initialize registry and A/B testers
registry = PromptRegistry(Path("./prompts"))
ab_tests: dict[str, PromptABTester] = {}

@app.post("/render", response_model=RenderResponse)
async def render_prompt(request: RenderRequest):
    """Render a prompt template."""
    
    try:
        # Check if this template has an A/B test
        if request.template_name in ab_tests:
            tester = ab_tests[request.template_name]
            result = tester.render(
                user_id=request.user_id,
                **request.variables
            )
            
            config = registry.get(request.template_name)
            
            return RenderResponse(
                prompt=result.prompt,
                model=config.model,
                temperature=config.temperature,
                variant=result.variant_name
            )
        
        # Standard rendering
        prompt, config = registry.render(
            request.template_name,
            **request.variables
        )
        
        return RenderResponse(
            prompt=prompt,
            model=config.model,
            temperature=config.temperature
        )
    
    except KeyError as e:
        raise HTTPException(status_code=404, detail=str(e))
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))

@app.get("/templates")
async def list_templates():
    """List available templates."""
    return {"templates": registry.list_prompts()}

@app.post("/reload")
async def reload_templates():
    """Reload templates from disk."""
    global registry
    registry = PromptRegistry(Path("./prompts"))
    return {"status": "reloaded", "count": len(registry.prompts)}

References

Conclusion

Prompt templates transform prompt management from ad-hoc string manipulation to a proper engineering discipline. Start with simple variable substitution, then add Jinja2 for conditionals and loops. Build a registry to centralize prompts with their configurations. Use template inheritance to share common patterns across prompts. Implement A/B testing to systematically improve prompts based on data. Store templates in version control alongside code, but load them dynamically so you can update prompts without redeploying. The investment in template infrastructure pays off quickly—you’ll iterate faster, make fewer mistakes, and have clear visibility into what prompts are running in production.


Discover more from C4: Container, Code, Cloud & Context

Subscribe to get the latest posts sent to your email.

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.