Prompt Testing Kit
A/B test system prompts against test cases with automated scoring across quality dimensions.
Prompt A
Prompt B
3
3 test casesIntegration Code
import { createPromptTest } from 'agent-tools-kit/evaluation'
const test = createPromptTest({
model: 'gpt-4o',
runsPerCase: 3,
scoring: ['relevance', 'accuracy', 'tone'],
testCases: [
{ input: 'What are the benefits of solar energy?' },
{ input: 'Explain quantum computing to a 10-year-old' },
{ input: 'Write a professional email declining a meeting' },
],
})
const results = await test.compare(
{ id: 'A', systemPrompt: promptA },
{ id: 'B', systemPrompt: promptB },
)
console.log('Winner:', results.winner)
console.log('Score diff:', results.scoreDiff)