19 min read
A/B Testing
Master statistical testing and experimentation with proper sample sizes, significance calculations, and experiment design.
What is A/B Testing?
A/B testing (split testing) is a controlled experiment comparing two or more versions of a page, element, or experience to determine which performs better.
code
A/B Test Structure:
Traffic (100%)
│
┌──────┴──────┐
│ │
▼ ▼
Control Variant
(50%) (50%)
│ │
▼ ▼
[Original] [Change]
│ │
▼ ▼
Conversion Conversion
Rate: 2% Rate: 2.5%
│ │
└──────┬──────┘
│
▼
Statistical Analysis
│
▼
Significant? (p < 0.05)
│ │
Yes No
│ │
▼ ▼
Winner! ContinueWhy A/B Test? Intuition fails often. Data shows that even expert predictions about which design will win are correct only ~50% of the time—no better than flipping a coin.
Statistical Foundations
Key Statistical Concepts
code
interface ABTestStatistics {
// Sample size: Number of visitors per variation
sampleSize: number;
// Baseline conversion rate: Current/control conversion rate
baselineRate: number;
// Minimum Detectable Effect (MDE): Smallest improvement worth detecting
mde: number;
// Statistical significance: Probability result isn't due to chance
// Standard: 95% (α = 0.05)
significanceLevel: number;
// Statistical power: Probability of detecting a real effect
// Standard: 80% (β = 0.20)
power: number;
// p-value: Probability of seeing results if there's no real difference
pValue: number;
// Confidence interval: Range where true value likely falls
confidenceInterval: [number, number];
}Sample Size Calculation
code
class SampleSizeCalculator {
/**
* Calculate required sample size per variation
* Uses the standard formula for comparing two proportions
*/
calculate(params: {
baselineRate: number; // e.g., 0.02 for 2%
mde: number; // e.g., 0.20 for 20% relative lift
significanceLevel?: number; // default 0.05
power?: number; // default 0.80
}): number {
const {
baselineRate,
mde,
significanceLevel = 0.05,
power = 0.80
} = params;
// Target rate after improvement
const targetRate = baselineRate * (1 + mde);
// Pooled probability
const pooledP = (baselineRate + targetRate) / 2;
// Z-scores for significance and power
const zAlpha = this.getZScore(1 - significanceLevel / 2); // two-tailed
const zBeta = this.getZScore(power);
// Sample size formula
const numerator = 2 * pooledP * (1 - pooledP) * Math.pow(zAlpha + zBeta, 2);
const denominator = Math.pow(targetRate - baselineRate, 2);
return Math.ceil(numerator / denominator);
}
/**
* Calculate test duration based on traffic
*/
calculateDuration(params: {
sampleSizePerVariation: number;
dailyVisitors: number;
numberOfVariations: number;
trafficAllocation?: number; // default 1.0 (100%)
}): number {
const {
sampleSizePerVariation,
dailyVisitors,
numberOfVariations,
trafficAllocation = 1.0
} = params;
const totalSampleNeeded = sampleSizePerVariation * numberOfVariations;
const dailyTestTraffic = dailyVisitors * trafficAllocation;
return Math.ceil(totalSampleNeeded / dailyTestTraffic);
}
private getZScore(probability: number): number {
// Approximation of inverse normal CDF
const a1 = -3.969683028665376e+01;
const a2 = 2.209460984245205e+02;
const a3 = -2.759285104469687e+02;
const a4 = 1.383577518672690e+02;
const a5 = -3.066479806614716e+01;
const a6 = 2.506628277459239e+00;
const b1 = -5.447609879822406e+01;
const b2 = 1.615858368580409e+02;
const b3 = -1.556989798598866e+02;
const b4 = 6.680131188771972e+01;
const b5 = -1.328068155288572e+01;
const c1 = -7.784894002430293e-03;
const c2 = -3.223964580411365e-01;
const c3 = -2.400758277161838e+00;
const c4 = -2.549732539343734e+00;
const c5 = 4.374664141464968e+00;
const c6 = 2.938163982698783e+00;
const d1 = 7.784695709041462e-03;
const d2 = 3.224671290700398e-01;
const d3 = 2.445134137142996e+00;
const d4 = 3.754408661907416e+00;
const pLow = 0.02425;
const pHigh = 1 - pLow;
let q, r;
if (probability < pLow) {
q = Math.sqrt(-2 * Math.log(probability));
return (((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q+c6) /
((((d1*q+d2)*q+d3)*q+d4)*q+1);
} else if (probability <= pHigh) {
q = probability - 0.5;
r = q * q;
return (((((a1*r+a2)*r+a3)*r+a4)*r+a5)*r+a6)*q /
(((((b1*r+b2)*r+b3)*r+b4)*r+b5)*r+1);
} else {
q = Math.sqrt(-2 * Math.log(1 - probability));
return -(((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q+c6) /
((((d1*q+d2)*q+d3)*q+d4)*q+1);
}
}
}
// Example usage
const calculator = new SampleSizeCalculator();
const sampleSize = calculator.calculate({
baselineRate: 0.02, // 2% current conversion rate
mde: 0.20, // Want to detect 20% relative improvement
significanceLevel: 0.05,
power: 0.80
});
console.log(`Required sample size per variation: ${sampleSize}`);
// Output: Required sample size per variation: 3,924
const duration = calculator.calculateDuration({
sampleSizePerVariation: sampleSize,
dailyVisitors: 5000,
numberOfVariations: 2
});
console.log(`Test duration: ${duration} days`);
// Output: Test duration: 2 daysStatistical Significance Testing
code
class ABTestAnalyzer {
/**
* Calculate statistical significance using two-proportion z-test
*/
analyzeTest(control: TestData, variant: TestData): TestResult {
const controlRate = control.conversions / control.visitors;
const variantRate = variant.conversions / variant.visitors;
// Pooled proportion
const totalConversions = control.conversions + variant.conversions;
const totalVisitors = control.visitors + variant.visitors;
const pooledP = totalConversions / totalVisitors;
// Standard error
const se = Math.sqrt(
pooledP * (1 - pooledP) * (1 / control.visitors + 1 / variant.visitors)
);
// Z-score
const zScore = (variantRate - controlRate) / se;
// Two-tailed p-value
const pValue = 2 * (1 - this.normalCDF(Math.abs(zScore)));
// Confidence interval (95%)
const marginOfError = 1.96 * se;
const relativeLift = (variantRate - controlRate) / controlRate;
return {
controlRate: controlRate * 100,
variantRate: variantRate * 100,
relativeLift: relativeLift * 100,
absoluteLift: (variantRate - controlRate) * 100,
zScore,
pValue,
isSignificant: pValue < 0.05,
confidenceInterval: {
lower: (relativeLift - marginOfError / controlRate) * 100,
upper: (relativeLift + marginOfError / controlRate) * 100
},
recommendation: this.getRecommendation(pValue, relativeLift)
};
}
/**
* Bayesian analysis alternative
*/
bayesianAnalysis(
control: TestData,
variant: TestData,
priorAlpha: number = 1,
priorBeta: number = 1
): BayesianResult {
// Posterior parameters (Beta distribution)
const controlAlpha = priorAlpha + control.conversions;
const controlBeta = priorBeta + control.visitors - control.conversions;
const variantAlpha = priorAlpha + variant.conversions;
const variantBeta = priorBeta + variant.visitors - variant.conversions;
// Monte Carlo simulation for probability of variant being better
const simulations = 100000;
let variantWins = 0;
for (let i = 0; i < simulations; i++) {
const controlSample = this.betaSample(controlAlpha, controlBeta);
const variantSample = this.betaSample(variantAlpha, variantBeta);
if (variantSample > controlSample) {
variantWins++;
}
}
const probabilityBetter = variantWins / simulations;
return {
probabilityVariantBetter: probabilityBetter * 100,
probabilityControlBetter: (1 - probabilityBetter) * 100,
expectedLift: this.calculateExpectedLift(
controlAlpha, controlBeta,
variantAlpha, variantBeta
),
recommendation: probabilityBetter >= 0.95
? 'Implement variant'
: probabilityBetter <= 0.05
? 'Keep control'
: 'Continue testing'
};
}
private normalCDF(x: number): number {
const a1 = 0.254829592;
const a2 = -0.284496736;
const a3 = 1.421413741;
const a4 = -1.453152027;
const a5 = 1.061405429;
const p = 0.3275911;
const sign = x < 0 ? -1 : 1;
x = Math.abs(x) / Math.sqrt(2);
const t = 1.0 / (1.0 + p * x);
const y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * Math.exp(-x * x);
return 0.5 * (1.0 + sign * y);
}
private betaSample(alpha: number, beta: number): number {
// Box-Muller for gamma samples, then beta = gamma1 / (gamma1 + gamma2)
const gamma1 = this.gammaSample(alpha);
const gamma2 = this.gammaSample(beta);
return gamma1 / (gamma1 + gamma2);
}
private gammaSample(alpha: number): number {
// Marsaglia and Tsang's method
if (alpha < 1) {
return this.gammaSample(1 + alpha) * Math.pow(Math.random(), 1 / alpha);
}
const d = alpha - 1 / 3;
const c = 1 / Math.sqrt(9 * d);
while (true) {
let x, v;
do {
x = this.normalSample();
v = 1 + c * x;
} while (v <= 0);
v = v * v * v;
const u = Math.random();
if (u < 1 - 0.0331 * (x * x) * (x * x)) return d * v;
if (Math.log(u) < 0.5 * x * x + d * (1 - v + Math.log(v))) return d * v;
}
}
private normalSample(): number {
// Box-Muller transform
const u1 = Math.random();
const u2 = Math.random();
return Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
}
private calculateExpectedLift(
cAlpha: number, cBeta: number,
vAlpha: number, vBeta: number
): number {
const controlMean = cAlpha / (cAlpha + cBeta);
const variantMean = vAlpha / (vAlpha + vBeta);
return ((variantMean - controlMean) / controlMean) * 100;
}
private getRecommendation(pValue: number, lift: number): string {
if (pValue >= 0.05) {
return 'Continue testing - not yet statistically significant';
}
if (lift > 0) {
return 'Implement variant - statistically significant improvement';
}
return 'Keep control - variant performs worse';
}
}
// Example usage
const analyzer = new ABTestAnalyzer();
const result = analyzer.analyzeTest(
{ visitors: 10000, conversions: 200 }, // Control: 2.0%
{ visitors: 10000, conversions: 250 } // Variant: 2.5%
);
console.log(result);
/*
{
controlRate: 2.0,
variantRate: 2.5,
relativeLift: 25,
absoluteLift: 0.5,
zScore: 2.52,
pValue: 0.012,
isSignificant: true,
confidenceInterval: { lower: 5.2, upper: 44.8 },
recommendation: 'Implement variant - statistically significant improvement'
}
*/A/B Test Implementation
Client-Side Testing
code
class ABTestClient {
private tests: Map<string, TestConfig> = new Map();
private assignments: Map<string, string> = new Map();
constructor(private analytics: AnalyticsClient) {
this.loadAssignments();
}
/**
* Register a test configuration
*/
registerTest(config: TestConfig) {
this.tests.set(config.id, config);
}
/**
* Get variation for a test
*/
getVariation(testId: string): string {
// Check if already assigned
if (this.assignments.has(testId)) {
return this.assignments.get(testId)!;
}
const test = this.tests.get(testId);
if (!test || !test.isActive) {
return 'control';
}
// Assign variation based on traffic allocation
const variation = this.assignVariation(test);
this.assignments.set(testId, variation);
this.saveAssignments();
// Track exposure
this.trackExposure(testId, variation);
return variation;
}
/**
* Track conversion for a test
*/
trackConversion(testId: string, value?: number) {
const variation = this.assignments.get(testId);
if (!variation) return;
this.analytics.track('ab_test_conversion', {
test_id: testId,
variation,
value,
timestamp: Date.now()
});
}
private assignVariation(test: TestConfig): string {
// Use deterministic hash for consistent assignment
const userId = this.getUserId();
const hash = this.hashString(`${userId}:${test.id}`);
const bucket = hash % 100;
// Check if user is in test (traffic allocation)
if (bucket >= test.trafficAllocation * 100) {
return 'control'; // Not in test
}
// Assign to variation
let cumulative = 0;
const variationBucket = hash % 1000;
for (const variation of test.variations) {
cumulative += variation.weight * 1000;
if (variationBucket < cumulative) {
return variation.id;
}
}
return 'control';
}
private hashString(str: string): number {
let hash = 0;
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash;
}
return Math.abs(hash);
}
private getUserId(): string {
let userId = localStorage.getItem('ab_user_id');
if (!userId) {
userId = `user_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
localStorage.setItem('ab_user_id', userId);
}
return userId;
}
private trackExposure(testId: string, variation: string) {
this.analytics.track('ab_test_exposure', {
test_id: testId,
variation,
timestamp: Date.now(),
url: window.location.href
});
}
private loadAssignments() {
const stored = localStorage.getItem('ab_assignments');
if (stored) {
const parsed = JSON.parse(stored);
Object.entries(parsed).forEach(([k, v]) => {
this.assignments.set(k, v as string);
});
}
}
private saveAssignments() {
const obj: Record<string, string> = {};
this.assignments.forEach((v, k) => { obj[k] = v; });
localStorage.setItem('ab_assignments', JSON.stringify(obj));
}
}
// React hook for A/B testing
function useABTest(testId: string): {
variation: string;
trackConversion: (value?: number) => void;
} {
const abClient = useABClient();
const [variation, setVariation] = useState('control');
useEffect(() => {
const assigned = abClient.getVariation(testId);
setVariation(assigned);
}, [testId]);
const trackConversion = useCallback((value?: number) => {
abClient.trackConversion(testId, value);
}, [testId]);
return { variation, trackConversion };
}
// Usage in component
function PricingPage() {
const { variation, trackConversion } = useABTest('pricing_layout_test');
const handlePurchase = (plan: Plan) => {
trackConversion(plan.price);
// Continue with purchase flow
};
return (
<div>
{variation === 'control' && <PricingTableHorizontal onSelect={handlePurchase} />}
{variation === 'variant_a' && <PricingTableVertical onSelect={handlePurchase} />}
{variation === 'variant_b' && <PricingCards onSelect={handlePurchase} />}
</div>
);
}Server-Side Testing
code
// Server-side A/B test implementation
import { createClient } from '@supabase/supabase-js';
class ServerABTest {
private supabase;
constructor() {
this.supabase = createClient(
process.env.SUPABASE_URL!,
process.env.SUPABASE_SERVICE_KEY!
);
}
/**
* Get or assign variation for a user
*/
async getVariation(
testId: string,
userId: string,
sessionId?: string
): Promise<string> {
// Check existing assignment
const { data: existing } = await this.supabase
.from('ab_test_assignments')
.select('variation')
.eq('test_id', testId)
.eq('user_id', userId)
.single();
if (existing) {
return existing.variation;
}
// Get test config
const { data: test } = await this.supabase
.from('ab_tests')
.select('*')
.eq('id', testId)
.eq('status', 'active')
.single();
if (!test) {
return 'control';
}
// Assign variation
const variation = this.assignVariation(test, userId);
// Store assignment
await this.supabase.from('ab_test_assignments').insert({
test_id: testId,
user_id: userId,
session_id: sessionId,
variation,
assigned_at: new Date().toISOString()
});
// Track exposure
await this.trackExposure(testId, userId, variation);
return variation;
}
/**
* Track conversion event
*/
async trackConversion(
testId: string,
userId: string,
eventName: string,
value?: number,
metadata?: Record<string, any>
) {
const { data: assignment } = await this.supabase
.from('ab_test_assignments')
.select('variation')
.eq('test_id', testId)
.eq('user_id', userId)
.single();
if (!assignment) return;
await this.supabase.from('ab_test_conversions').insert({
test_id: testId,
user_id: userId,
variation: assignment.variation,
event_name: eventName,
value,
metadata,
converted_at: new Date().toISOString()
});
}
/**
* Get test results
*/
async getResults(testId: string): Promise<TestResults> {
const { data: results } = await this.supabase.rpc('get_ab_test_results', {
p_test_id: testId
});
return results;
}
private assignVariation(test: ABTest, userId: string): string {
const hash = this.hashString(`${userId}:${test.id}`);
const bucket = hash % 100;
if (bucket >= test.traffic_allocation * 100) {
return 'control';
}
let cumulative = 0;
const variationBucket = hash % 1000;
for (const variation of test.variations) {
cumulative += variation.weight * 1000;
if (variationBucket < cumulative) {
return variation.id;
}
}
return 'control';
}
private hashString(str: string): number {
let hash = 5381;
for (let i = 0; i < str.length; i++) {
hash = ((hash << 5) + hash) + str.charCodeAt(i);
}
return Math.abs(hash);
}
private async trackExposure(
testId: string,
userId: string,
variation: string
) {
await this.supabase.from('ab_test_exposures').insert({
test_id: testId,
user_id: userId,
variation,
exposed_at: new Date().toISOString()
});
}
}
// SQL function for results
/*
CREATE OR REPLACE FUNCTION get_ab_test_results(p_test_id UUID)
RETURNS TABLE (
variation TEXT,
exposures BIGINT,
conversions BIGINT,
conversion_rate NUMERIC,
total_value NUMERIC
) AS $$
BEGIN
RETURN QUERY
SELECT
e.variation,
COUNT(DISTINCT e.user_id) as exposures,
COUNT(DISTINCT c.user_id) as conversions,
ROUND(COUNT(DISTINCT c.user_id)::NUMERIC / NULLIF(COUNT(DISTINCT e.user_id), 0) * 100, 2) as conversion_rate,
COALESCE(SUM(c.value), 0) as total_value
FROM ab_test_exposures e
LEFT JOIN ab_test_conversions c
ON e.test_id = c.test_id
AND e.user_id = c.user_id
AND e.variation = c.variation
WHERE e.test_id = p_test_id
GROUP BY e.variation
ORDER BY e.variation;
END;
$$ LANGUAGE plpgsql;
*/Common A/B Testing Mistakes
1. Stopping Tests Too Early
code
// BAD: Peeking and stopping at first significance
function badTestEvaluation(results: TestResults) {
// Day 2: p=0.03, significant!
if (results.pValue < 0.05) {
return 'Winner found!'; // WRONG - likely false positive
}
}
// GOOD: Sequential testing with proper stopping rules
class SequentialTest {
private boundaries: SpendingFunction;
constructor(
private alpha: number = 0.05,
private maxLooks: number = 5
) {
// O'Brien-Fleming spending function
this.boundaries = this.calculateBoundaries();
}
canStop(currentLook: number, zScore: number): StopDecision {
const boundary = this.boundaries[currentLook];
if (Math.abs(zScore) >= boundary) {
return {
stop: true,
reason: zScore > 0 ? 'variant_wins' : 'control_wins',
adjustedPValue: this.adjustPValue(currentLook, zScore)
};
}
if (currentLook === this.maxLooks) {
return {
stop: true,
reason: 'max_looks_reached',
adjustedPValue: this.adjustPValue(currentLook, zScore)
};
}
return { stop: false };
}
private calculateBoundaries(): number[] {
// O'Brien-Fleming boundaries
const boundaries: number[] = [];
for (let i = 1; i <= this.maxLooks; i++) {
const info = i / this.maxLooks;
boundaries.push(this.getZScore(1 - this.alpha / (2 * Math.sqrt(info))));
}
return boundaries;
}
private getZScore(p: number): number {
// Approximate inverse normal
return 1.96 * Math.sqrt(-2 * Math.log(1 - p));
}
private adjustPValue(look: number, zScore: number): number {
// Adjusted p-value accounting for multiple looks
return Math.min(1, this.alpha * look / this.maxLooks * 2 * (1 - this.normalCDF(Math.abs(zScore))));
}
private normalCDF(x: number): number {
return 0.5 * (1 + Math.tanh(Math.sqrt(2 / Math.PI) * (x + 0.044715 * Math.pow(x, 3))));
}
}2. Multiple Comparison Problem
code
// BAD: Testing many metrics without correction
function badMultipleMetrics(results: MetricResults[]) {
const winners: string[] = [];
results.forEach(metric => {
if (metric.pValue < 0.05) {
winners.push(metric.name); // High false positive rate!
}
});
return winners;
}
// GOOD: Apply Bonferroni or FDR correction
class MultipleComparisonCorrection {
/**
* Bonferroni correction (conservative)
*/
bonferroni(pValues: number[], alpha: number = 0.05): CorrectedResult[] {
const adjustedAlpha = alpha / pValues.length;
return pValues.map((p, i) => ({
original: p,
adjusted: Math.min(p * pValues.length, 1),
significant: p < adjustedAlpha
}));
}
/**
* Benjamini-Hochberg FDR correction (less conservative)
*/
fdr(pValues: number[], alpha: number = 0.05): CorrectedResult[] {
const sorted = pValues
.map((p, i) => ({ p, i }))
.sort((a, b) => a.p - b.p);
const n = pValues.length;
const results: CorrectedResult[] = new Array(n);
let maxSignificantIndex = -1;
sorted.forEach((item, rank) => {
const threshold = (rank + 1) * alpha / n;
if (item.p <= threshold) {
maxSignificantIndex = rank;
}
});
sorted.forEach((item, rank) => {
results[item.i] = {
original: item.p,
adjusted: Math.min(item.p * n / (rank + 1), 1),
significant: rank <= maxSignificantIndex
};
});
return results;
}
}
// Usage
const correction = new MultipleComparisonCorrection();
const pValues = [0.01, 0.03, 0.04, 0.08, 0.15]; // 5 metrics tested
const bonferroni = correction.bonferroni(pValues);
// Only p=0.01 is significant (0.01 < 0.05/5 = 0.01)
const fdr = correction.fdr(pValues);
// p=0.01, 0.03, 0.04 may all be significant3. Segment Mining
code
// BAD: Looking for any segment that shows significance
function badSegmentMining(results: SegmentResults[]) {
return results.find(segment => segment.pValue < 0.05);
// Will find "winners" that are just noise
}
// GOOD: Pre-register segments and correct for multiple comparisons
class PreRegisteredSegmentAnalysis {
private preRegisteredSegments: string[];
constructor(segments: string[]) {
this.preRegisteredSegments = segments;
// Document before test starts!
this.logPreRegistration(segments);
}
analyze(results: SegmentResults[]): AnalysisResult {
// Only analyze pre-registered segments
const validResults = results.filter(r =>
this.preRegisteredSegments.includes(r.segment)
);
// Apply FDR correction
const correction = new MultipleComparisonCorrection();
const corrected = correction.fdr(
validResults.map(r => r.pValue)
);
return validResults.map((r, i) => ({
segment: r.segment,
lift: r.lift,
originalPValue: r.pValue,
adjustedPValue: corrected[i].adjusted,
significant: corrected[i].significant
}));
}
private logPreRegistration(segments: string[]) {
console.log(`Pre-registered segments (${new Date().toISOString()}):`);
segments.forEach(s => console.log(` - ${s}`));
}
}Multivariate Testing (MVT)
code
class MultivariateTest {
/**
* Full factorial design - tests all combinations
*/
createFullFactorialDesign(factors: Factor[]): Variation[] {
const variations: Variation[] = [];
const generate = (index: number, current: Record<string, string>) => {
if (index === factors.length) {
variations.push({
id: this.generateVariationId(current),
factors: { ...current }
});
return;
}
const factor = factors[index];
factor.levels.forEach(level => {
generate(index + 1, { ...current, [factor.name]: level });
});
};
generate(0, {});
return variations;
}
/**
* Fractional factorial for many factors
*/
createFractionalDesign(factors: Factor[], fraction: number = 0.5): Variation[] {
const full = this.createFullFactorialDesign(factors);
const sampleSize = Math.ceil(full.length * fraction);
// Use orthogonal array selection
return this.selectOrthogonal(full, sampleSize);
}
/**
* Analyze main effects and interactions
*/
analyzeEffects(
variations: Variation[],
results: Map<string, ConversionData>
): EffectAnalysis {
const effects: EffectAnalysis = {
mainEffects: {},
interactions: {}
};
// Calculate main effects
const factors = Object.keys(variations[0].factors);
factors.forEach(factor => {
const levels = [...new Set(variations.map(v => v.factors[factor]))];
levels.forEach(level => {
const withLevel = variations.filter(v => v.factors[factor] === level);
const avgRate = this.averageConversionRate(withLevel, results);
if (!effects.mainEffects[factor]) {
effects.mainEffects[factor] = {};
}
effects.mainEffects[factor][level] = avgRate;
});
});
// Calculate two-way interactions
for (let i = 0; i < factors.length; i++) {
for (let j = i + 1; j < factors.length; j++) {
const factor1 = factors[i];
const factor2 = factors[j];
const key = `${factor1}×${factor2}`;
effects.interactions[key] = this.calculateInteraction(
variations, results, factor1, factor2
);
}
}
return effects;
}
private generateVariationId(factors: Record<string, string>): string {
return Object.entries(factors)
.map(([k, v]) => `${k}:${v}`)
.join('|');
}
private selectOrthogonal(variations: Variation[], n: number): Variation[] {
// Simplified: use systematic sampling
const step = Math.floor(variations.length / n);
return variations.filter((_, i) => i % step === 0).slice(0, n);
}
private averageConversionRate(
variations: Variation[],
results: Map<string, ConversionData>
): number {
const rates = variations.map(v => {
const data = results.get(v.id);
return data ? data.conversions / data.visitors : 0;
});
return rates.reduce((a, b) => a + b, 0) / rates.length;
}
private calculateInteraction(
variations: Variation[],
results: Map<string, ConversionData>,
factor1: string,
factor2: string
): InteractionEffect {
// Simplified interaction calculation
const levels1 = [...new Set(variations.map(v => v.factors[factor1]))];
const levels2 = [...new Set(variations.map(v => v.factors[factor2]))];
const matrix: Record<string, Record<string, number>> = {};
levels1.forEach(l1 => {
matrix[l1] = {};
levels2.forEach(l2 => {
const matching = variations.filter(
v => v.factors[factor1] === l1 && v.factors[factor2] === l2
);
matrix[l1][l2] = this.averageConversionRate(matching, results);
});
});
return { matrix, significant: false }; // Add significance testing
}
}
// Example usage
const mvt = new MultivariateTest();
const factors: Factor[] = [
{ name: 'headline', levels: ['benefit', 'feature', 'question'] },
{ name: 'cta_color', levels: ['blue', 'green', 'orange'] },
{ name: 'image', levels: ['product', 'people', 'abstract'] }
];
const variations = mvt.createFullFactorialDesign(factors);
console.log(`Full factorial: ${variations.length} variations`);
// Output: Full factorial: 27 variations (3×3×3)
const fractional = mvt.createFractionalDesign(factors, 0.33);
console.log(`Fractional: ${fractional.length} variations`);
// Output: Fractional: 9 variationsTest Documentation Template
code
interface TestDocumentation {
// Metadata
testId: string;
testName: string;
owner: string;
createdDate: string;
// Hypothesis
hypothesis: {
observation: string;
change: string;
expectedOutcome: string;
rationale: string;
};
// Design
design: {
type: 'A/B' | 'A/B/n' | 'MVT';
primaryMetric: string;
secondaryMetrics: string[];
segments: string[];
trafficAllocation: number;
minimumSampleSize: number;
expectedDuration: number;
};
// Variations
variations: Array<{
id: string;
name: string;
description: string;
screenshot?: string;
}>;
// Results
results?: {
startDate: string;
endDate: string;
sampleSize: number;
winner: string;
lift: number;
confidence: number;
segmentResults?: Record<string, SegmentResult>;
};
// Learnings
learnings?: {
keyInsights: string[];
nextSteps: string[];
applicability: string;
};
}
// Example documentation
const testDoc: TestDocumentation = {
testId: 'TEST-2026-001',
testName: 'Pricing Page CTA Color Test',
owner: 'growth-team',
createdDate: '2026-01-10',
hypothesis: {
observation: 'CTA button has low contrast ratio (2.1:1) against page background',
change: 'Increase CTA button contrast by changing from light blue to dark blue',
expectedOutcome: 'Click-through rate will increase by 15%',
rationale: 'WCAG guidelines recommend 4.5:1 contrast; higher visibility drives more clicks'
},
design: {
type: 'A/B',
primaryMetric: 'CTA click-through rate',
secondaryMetrics: ['Pricing page bounce rate', 'Trial signups'],
segments: ['New vs Returning', 'Mobile vs Desktop'],
trafficAllocation: 1.0,
minimumSampleSize: 5000,
expectedDuration: 14
},
variations: [
{
id: 'control',
name: 'Light Blue CTA',
description: 'Current button: #60A5FA on white background',
screenshot: '/tests/001/control.png'
},
{
id: 'variant_a',
name: 'Dark Blue CTA',
description: 'New button: #1D4ED8 on white background (7:1 contrast)',
screenshot: '/tests/001/variant.png'
}
]
};A/B Testing Best Practices Checklist
code
## Pre-Test Checklist
### Planning
□ Clear, falsifiable hypothesis documented
□ Primary metric defined (single source of truth)
□ Secondary metrics identified
□ Sample size calculated
□ Test duration estimated
□ Segments pre-registered
### Technical
□ Tracking implementation verified
□ Variation assignment is deterministic
□ No flickering/flash of original content
□ QA on all variations across devices
□ No interference with other tests
### Statistical
□ Power analysis completed (80%+ power)
□ Minimum detectable effect is meaningful
□ Stopping rules defined in advance
□ Multiple comparison correction planned
## During Test
□ Monitor for technical issues daily
□ Do not peek at results for decisions
□ Document any anomalies
□ Resist pressure to stop early
## Post-Test
□ Reached planned sample size
□ Statistical significance verified
□ Effect size is practically meaningful
□ Segment analysis completed
□ Results documented
□ Learnings shared with team
□ Winner implemented (or kept control)Next: Understand your users better with User Research methodologies.