Prompt Testing Kit

A/B test system prompts against test cases with automated scoring across quality dimensions.

Prompt A
Prompt B
3
3 test cases

Integration Code

import { createPromptTest } from 'agent-tools-kit/evaluation'

const test = createPromptTest({
  model: 'gpt-4o',
  runsPerCase: 3,
  scoring: ['relevance', 'accuracy', 'tone'],
  testCases: [
    { input: 'What are the benefits of solar energy?' },
    { input: 'Explain quantum computing to a 10-year-old' },
    { input: 'Write a professional email declining a meeting' },
  ],
})

const results = await test.compare(
  { id: 'A', systemPrompt: promptA },
  { id: 'B', systemPrompt: promptB },
)

console.log('Winner:', results.winner)
console.log('Score diff:', results.scoreDiff)