diff --git a/apps/web-roo-code/src/lib/__tests__/eval-outcomes.test.ts b/apps/web-roo-code/src/lib/__tests__/eval-outcomes.test.ts new file mode 100644 index 00000000000..63380b2c22c --- /dev/null +++ b/apps/web-roo-code/src/lib/__tests__/eval-outcomes.test.ts @@ -0,0 +1,73 @@ +import { describe, it, expect } from "vitest" +import { EVAL_OUTCOMES, isEvalOutcomeId, getEvalOutcomeBySlug } from "../eval-outcomes" + +describe("EVAL_OUTCOMES", () => { + it("contains at least one outcome", () => { + expect(EVAL_OUTCOMES.length).toBeGreaterThan(0) + }) + + it("every outcome has a unique id", () => { + const ids = EVAL_OUTCOMES.map((o) => o.id) + expect(new Set(ids).size).toBe(ids.length) + }) + + it("every outcome has a unique slug", () => { + const slugs = EVAL_OUTCOMES.map((o) => o.slug) + expect(new Set(slugs).size).toBe(slugs.length) + }) + + it("every outcome has required fields", () => { + for (const outcome of EVAL_OUTCOMES) { + expect(outcome.id).toBeTruthy() + expect(outcome.slug).toBeTruthy() + expect(outcome.name).toBeTruthy() + expect(outcome.description).toBeTruthy() + expect(outcome.icon).toBeDefined() + expect(outcome.recommendedRoleIds.length).toBeGreaterThan(0) + expect(outcome.whyItWorks.length).toBeGreaterThan(0) + } + }) + + it("builderProfile, when present, has required fields", () => { + const withProfiles = EVAL_OUTCOMES.filter((o) => o.builderProfile) + expect(withProfiles.length).toBeGreaterThan(0) + + for (const outcome of withProfiles) { + const profile = outcome.builderProfile! + expect(profile.title).toBeTruthy() + expect(profile.description).toBeTruthy() + expect(profile.capabilities.length).toBeGreaterThan(0) + expect(profile.howItWorks.length).toBeGreaterThan(0) + } + }) +}) + +describe("isEvalOutcomeId", () => { + it("returns true for valid outcome ids", () => { + for (const outcome of EVAL_OUTCOMES) { + expect(isEvalOutcomeId(outcome.id)).toBe(true) + } + }) + + it("returns false for invalid ids", () => { + expect(isEvalOutcomeId("nonexistent")).toBe(false) + expect(isEvalOutcomeId("")).toBe(false) + expect(isEvalOutcomeId("PROTOTYPE_TO_PR")).toBe(false) + }) +}) + +describe("getEvalOutcomeBySlug", () => { + it("returns the correct outcome for valid slugs", () => { + for (const outcome of EVAL_OUTCOMES) { + const result = getEvalOutcomeBySlug(outcome.slug) + expect(result).toBeDefined() + expect(result!.id).toBe(outcome.id) + expect(result!.slug).toBe(outcome.slug) + } + }) + + it("returns undefined for invalid slugs", () => { + expect(getEvalOutcomeBySlug("nonexistent")).toBeUndefined() + expect(getEvalOutcomeBySlug("")).toBeUndefined() + }) +}) diff --git a/apps/web-roo-code/src/lib/__tests__/mock-recommendations.test.ts b/apps/web-roo-code/src/lib/__tests__/mock-recommendations.test.ts new file mode 100644 index 00000000000..4ce221fb136 --- /dev/null +++ b/apps/web-roo-code/src/lib/__tests__/mock-recommendations.test.ts @@ -0,0 +1,141 @@ +import { describe, it, expect } from "vitest" +import { + getEngineerRoles, + getEngineerRole, + getRoleRecommendation, + getAllRecommendations, + getCloudSetupUrl, + TASKS_PER_DAY, + MODEL_TIMELINE, + type ModelCandidate, +} from "../mock-recommendations" + +describe("getEngineerRoles", () => { + it("returns a non-empty array of roles", () => { + const roles = getEngineerRoles() + expect(roles.length).toBeGreaterThan(0) + }) + + it("every role has required fields", () => { + for (const role of getEngineerRoles()) { + expect(role.id).toBeTruthy() + expect(role.name).toBeTruthy() + expect(role.description).toBeTruthy() + expect(role.bestFor.length).toBeGreaterThan(0) + expect(role.strengths.length).toBeGreaterThan(0) + } + }) + + it("every role has a unique id", () => { + const roles = getEngineerRoles() + const ids = roles.map((r) => r.id) + expect(new Set(ids).size).toBe(ids.length) + }) +}) + +describe("getEngineerRole", () => { + it("returns the correct role for a valid id", () => { + const roles = getEngineerRoles() + for (const role of roles) { + const result = getEngineerRole(role.id) + expect(result).toBeDefined() + expect(result!.id).toBe(role.id) + } + }) + + it("returns undefined for an invalid id", () => { + expect(getEngineerRole("nonexistent")).toBeUndefined() + expect(getEngineerRole("")).toBeUndefined() + }) +}) + +describe("getRoleRecommendation", () => { + it("returns a recommendation for each known role", () => { + const roles = getEngineerRoles() + for (const role of roles) { + const rec = getRoleRecommendation(role.id) + expect(rec).toBeDefined() + expect(rec!.roleId).toBe(role.id) + expect(rec!.best.length).toBeGreaterThan(0) + expect(rec!.allCandidates.length).toBeGreaterThan(0) + } + }) + + it("returns undefined for an unknown role", () => { + expect(getRoleRecommendation("nonexistent")).toBeUndefined() + }) +}) + +describe("getAllRecommendations", () => { + it("returns one recommendation per role", () => { + const roles = getEngineerRoles() + const recs = getAllRecommendations() + expect(recs.length).toBe(roles.length) + }) + + it("every recommendation has consistent totalEvalRuns and totalExercises", () => { + const recs = getAllRecommendations() + // All roles share the same pool of eval runs/exercises + const firstRuns = recs[0]?.totalEvalRuns + const firstExercises = recs[0]?.totalExercises + for (const rec of recs) { + expect(rec.totalEvalRuns).toBe(firstRuns) + expect(rec.totalExercises).toBe(firstExercises) + } + }) +}) + +describe("getCloudSetupUrl", () => { + it("generates a valid URL with model parameters", () => { + const candidate: ModelCandidate = { + provider: "anthropic", + modelId: "claude-opus-4-6", + displayName: "Claude Opus 4.6", + compositeScore: 90, + tier: "best", + tags: [], + successRate: 85, + avgCostPerTask: 1.25, + estimatedDailyCost: 100, + avgTimePerTask: 180, + languageScores: { go: 80, java: 85, javascript: 90, python: 92, rust: 75 }, + settings: { temperature: 0 }, + } + + const url = getCloudSetupUrl(candidate) + expect(url).toContain("https://app.roocode.com/sign-up") + expect(url).toContain("claude-opus-4-6") + expect(url).toContain("anthropic") + // The URL is encoded via URLSearchParams, so = becomes %3D + expect(url).toContain("temperature") + expect(url).toContain("0") + }) +}) + +describe("TASKS_PER_DAY", () => { + it("is a positive number", () => { + expect(TASKS_PER_DAY).toBeGreaterThan(0) + }) +}) + +describe("MODEL_TIMELINE", () => { + it("is a non-empty array", () => { + expect(MODEL_TIMELINE.length).toBeGreaterThan(0) + }) + + it("entries have required fields", () => { + for (const entry of MODEL_TIMELINE) { + expect(entry.modelName).toBeTruthy() + expect(entry.provider).toBeTruthy() + expect(entry.releaseDate).toBeTruthy() + expect(entry.score).toBeGreaterThan(0) + expect(entry.costPerRun).toBeGreaterThan(0) + } + }) + + it("entries are in chronological order", () => { + for (let i = 1; i < MODEL_TIMELINE.length; i++) { + expect(MODEL_TIMELINE[i]!.releaseDate >= MODEL_TIMELINE[i - 1]!.releaseDate).toBe(true) + } + }) +}) diff --git a/apps/web-roo-code/src/lib/__tests__/objective-default-models-v1.test.ts b/apps/web-roo-code/src/lib/__tests__/objective-default-models-v1.test.ts new file mode 100644 index 00000000000..4166568e106 --- /dev/null +++ b/apps/web-roo-code/src/lib/__tests__/objective-default-models-v1.test.ts @@ -0,0 +1,72 @@ +import { describe, it, expect } from "vitest" +import { pickObjectiveDefaultModelV1 } from "../objective-default-models-v1" +import type { EvalOutcomeId } from "../eval-outcomes" + +const ALL_OUTCOME_IDS: EvalOutcomeId[] = [ + "prototype_to_pr", + "paper_cuts", + "sentry_triage", + "repro_to_fix", + "review_guardrails", + "issue_to_pr", +] + +const ALL_MODES = ["best", "fastest", "cost"] as const + +describe("pickObjectiveDefaultModelV1", () => { + it("returns a non-null result for every outcome and mode combination", () => { + for (const outcomeId of ALL_OUTCOME_IDS) { + for (const mode of ALL_MODES) { + const result = pickObjectiveDefaultModelV1(outcomeId, mode) + expect(result).not.toBeNull() + expect(result!.modelId).toBeTruthy() + expect(result!.weighted).toBeDefined() + expect(result!.weighted.score).toBeGreaterThan(0) + expect(result!.weighted.costUsd).toBeGreaterThanOrEqual(0) + expect(result!.weighted.runtimeS).toBeGreaterThan(0) + } + } + }) + + it("best mode picks the highest-scoring model", () => { + for (const outcomeId of ALL_OUTCOME_IDS) { + const best = pickObjectiveDefaultModelV1(outcomeId, "best") + const fastest = pickObjectiveDefaultModelV1(outcomeId, "fastest") + const cheapest = pickObjectiveDefaultModelV1(outcomeId, "cost") + + // The best-quality model should have a score >= any other mode's pick + expect(best!.weighted.score).toBeGreaterThanOrEqual(fastest!.weighted.score) + expect(best!.weighted.score).toBeGreaterThanOrEqual(cheapest!.weighted.score) + } + }) + + it("fastest mode picks a model with lower or equal runtime than best", () => { + for (const outcomeId of ALL_OUTCOME_IDS) { + const best = pickObjectiveDefaultModelV1(outcomeId, "best") + const fastest = pickObjectiveDefaultModelV1(outcomeId, "fastest") + + expect(fastest!.weighted.runtimeS).toBeLessThanOrEqual(best!.weighted.runtimeS) + } + }) + + it("cost mode picks a model with lower or equal cost than best", () => { + for (const outcomeId of ALL_OUTCOME_IDS) { + const best = pickObjectiveDefaultModelV1(outcomeId, "best") + const cheapest = pickObjectiveDefaultModelV1(outcomeId, "cost") + + expect(cheapest!.weighted.costUsd).toBeLessThanOrEqual(best!.weighted.costUsd) + } + }) + + it("speed/cost picks stay within 85% quality floor of the best model", () => { + for (const outcomeId of ALL_OUTCOME_IDS) { + const best = pickObjectiveDefaultModelV1(outcomeId, "best") + const fastest = pickObjectiveDefaultModelV1(outcomeId, "fastest") + const cheapest = pickObjectiveDefaultModelV1(outcomeId, "cost") + const qualityFloor = best!.weighted.score * 0.85 + + expect(fastest!.weighted.score).toBeGreaterThanOrEqual(qualityFloor) + expect(cheapest!.weighted.score).toBeGreaterThanOrEqual(qualityFloor) + } + }) +})