Files
continue/core/llm/autodetect.ts
2026-01-03 05:55:37 +02:00

512 lines
11 KiB
TypeScript

import {
ChatMessage,
ModelCapability,
ModelDescription,
TemplateType,
} from "../index.js";
import { NEXT_EDIT_MODELS } from "./constants.js";
import {
anthropicTemplateMessages,
chatmlTemplateMessages,
codeLlama70bTemplateMessages,
codestralTemplateMessages,
deepseekTemplateMessages,
gemmaTemplateMessage,
graniteTemplateMessages,
llama2TemplateMessages,
llama3TemplateMessages,
llavaTemplateMessages,
neuralChatTemplateMessages,
openchatTemplateMessages,
phi2TemplateMessages,
phindTemplateMessages,
templateAlpacaMessages,
xWinCoderTemplateMessages,
zephyrTemplateMessages,
} from "./templates/chat.js";
import {
alpacaEditPrompt,
claudeEditPrompt,
codeLlama70bEditPrompt,
deepseekEditPrompt,
gemmaEditPrompt,
gptEditPrompt,
llama3EditPrompt,
mistralEditPrompt,
neuralChatEditPrompt,
openchatEditPrompt,
osModelsEditPrompt,
phindEditPrompt,
simplifiedEditPrompt,
xWinCoderEditPrompt,
zephyrEditPrompt,
} from "./templates/edit.js";
const PROVIDER_HANDLES_TEMPLATING: string[] = [
"lmstudio",
"lemonade",
"openai",
"nvidia",
"ollama",
"together",
"novita",
"msty",
"anthropic",
"bedrock",
"cohere",
"sagemaker",
"continue-proxy",
"mistral",
"mimo",
"sambanova",
"vertexai",
"watsonx",
"nebius",
"relace",
"openrouter",
"deepseek",
"xAI",
"groq",
"gemini",
"docker",
// TODO add these, change to inverted logic so only the ones that need templating are hardcoded
// Asksage.ts
// Azure.ts
// BedrockImport.ts
// Cerebras.ts
// Cloudflare.ts
// CometAPI.ts
// CustomLLM.ts
// DeepInfra.ts
// Fireworks.ts
// Flowise.ts
// FunctionNetwork.ts
// HuggingFaceInferenceAPI.ts
// HuggingFaceTEI.ts
// HuggingFaceTGI.ts
// Inception.ts
// Kindo.ts
// LlamaCpp.ts
// LlamaStack.ts
// Llamafile.ts
// Mock.ts
// Moonshot.ts
// NCompass.ts
// OVHcloud.ts
// Replicate.ts
// Scaleway.ts
// SiliconFlow.ts
// TARS.ts
// Test.ts
// TextGenWebUI.ts
// TransformersJsEmbeddingsProvider.ts
// Venice.ts
// Vllm.ts
// Voyage.ts
// etc
];
const PROVIDER_SUPPORTS_IMAGES: string[] = [
"openai",
"ollama",
"lemonade",
"cohere",
"gemini",
"msty",
"anthropic",
"bedrock",
"sagemaker",
"continue-proxy",
"openrouter",
"venice",
"sambanova",
"vertexai",
"azure",
"scaleway",
"nebius",
"ovhcloud",
"watsonx",
];
const MODEL_SUPPORTS_IMAGES: RegExp[] = [
/llava/,
/gpt-4-turbo/,
/gpt-4o/,
/gpt-4o-mini/,
/claude-3/,
/gemini-ultra/,
/gemini-1\.5-pro/,
/gemini-1\.5-flash/,
/sonnet/,
/opus/,
/haiku/,
/pixtral/,
/llama-?3\.2/,
/llama-?4/, // might use something like /llama-?(?:[4-9](?:\.\d+)?|\d{2,}(?:\.\d+)?)/ for forward compat, if needed
/\bgemma-?3(?!n)/, // gemma3 supports vision, but gemma3n doesn't!
/\b(pali|med)gemma/,
/qwen(.*)vl/,
/mistral-small/,
/mistral-medium/,
];
function modelSupportsImages(
provider: string,
model: string,
title: string | undefined,
capabilities: ModelCapability | undefined,
): boolean {
if (capabilities?.uploadImage !== undefined) {
return capabilities.uploadImage;
}
if (!PROVIDER_SUPPORTS_IMAGES.includes(provider)) {
return false;
}
const lowerModel = model.toLowerCase();
const lowerTitle = title?.toLowerCase() ?? "";
if (
lowerModel.includes("vision") ||
lowerTitle.includes("vision") ||
MODEL_SUPPORTS_IMAGES.some(
(modelrx) => modelrx.test(lowerModel) || modelrx.test(lowerTitle),
)
) {
return true;
}
return false;
}
function modelSupportsReasoning(
model: ModelDescription | null | undefined,
): boolean {
if (!model) {
return false;
}
if (model.completionOptions?.reasoning !== undefined) {
// Reasoning support is forced at the config level. Model might not necessarily support it though!
return model.completionOptions.reasoning;
}
// Seems our current way of disabling reasoning is not working for grok code so results in useless lightbulb
// if (model.model.includes("grok-code")) {
// return true;
// }
// do not turn reasoning on by default for claude 3 models
if (
model.model.includes("claude") &&
!model.model.includes("-3-") &&
!model.model.includes("-3.5-")
) {
return true;
}
if (model.model.includes("command-a-reasoning")) {
return true;
}
if (model.model.includes("deepseek-r")) {
return true;
}
return false;
}
const PARALLEL_PROVIDERS: string[] = [
"anthropic",
"bedrock",
"cohere",
"sagemaker",
"deepinfra",
"gemini",
"huggingface-inference-api",
"huggingface-tgi",
"mistral",
"moonshot",
"replicate",
"together",
"novita",
"sambanova",
"ovhcloud",
"nebius",
"vertexai",
"function-network",
"scaleway",
];
function llmCanGenerateInParallel(provider: string, model: string): boolean {
if (provider === "openai") {
return model.includes("gpt");
}
return PARALLEL_PROVIDERS.includes(provider);
}
function isProviderHandlesTemplatingOrNoTemplateTypeRequired(
modelName: string,
): boolean {
return (
modelName.includes("gpt") ||
modelName.includes("command") ||
modelName.includes("aya") ||
modelName.includes("chat-bison") ||
modelName.includes("pplx") ||
modelName.includes("gemini") ||
modelName.includes("grok") ||
modelName.includes("moonshot") ||
modelName.includes("kimi") ||
modelName.includes("mercury") ||
/^o\d/.test(modelName)
);
}
// NOTE: When updating this list,
// update core/nextEdit/templating/NextEditPromptEngine.ts as well.
const MODEL_SUPPORTS_NEXT_EDIT: string[] = [
NEXT_EDIT_MODELS.MERCURY_CODER,
NEXT_EDIT_MODELS.INSTINCT,
];
function modelSupportsNextEdit(
capabilities: ModelCapability | undefined,
model: string,
title: string | undefined,
): boolean {
if (capabilities?.nextEdit !== undefined) {
return capabilities.nextEdit;
}
const lower = model.toLowerCase();
if (
MODEL_SUPPORTS_NEXT_EDIT.some(
(modelName) => lower.includes(modelName) || title?.includes(modelName),
)
) {
return true;
}
return false;
}
function autodetectTemplateType(model: string): TemplateType | undefined {
const lower = model.toLowerCase();
if (lower.includes("codellama") && lower.includes("70b")) {
return "codellama-70b";
}
if (isProviderHandlesTemplatingOrNoTemplateTypeRequired(lower)) {
return undefined;
}
if (lower.includes("llama3") || lower.includes("llama-3")) {
return "llama3";
}
if (lower.includes("llava")) {
return "llava";
}
if (lower.includes("tinyllama")) {
return "zephyr";
}
if (lower.includes("xwin")) {
return "xwin-coder";
}
if (lower.includes("dolphin")) {
return "chatml";
}
if (lower.includes("gemma")) {
return "gemma";
}
if (lower.includes("phi2")) {
return "phi2";
}
if (lower.includes("phind")) {
return "phind";
}
if (lower.includes("llama")) {
return "llama2";
}
if (lower.includes("zephyr")) {
return "zephyr";
}
// Claude requests always sent through Messages API, so formatting not necessary
if (lower.includes("claude")) {
return "none";
}
// Nova Pro requests always sent through Converse API, so formatting not necessary
if (lower.includes("nova")) {
return "none";
}
if (lower.includes("codestral")) {
return "none";
}
if (lower.includes("alpaca") || lower.includes("wizard")) {
return "alpaca";
}
if (lower.includes("mistral") || lower.includes("mixtral")) {
return "llama2";
}
if (lower.includes("deepseek")) {
return "deepseek";
}
if (lower.includes("ninja") || lower.includes("openchat")) {
return "openchat";
}
if (lower.includes("neural-chat")) {
return "neural-chat";
}
if (lower.includes("granite")) {
return "granite";
}
return "chatml";
}
function autodetectTemplateFunction(
model: string,
provider: string,
explicitTemplate: TemplateType | undefined = undefined,
) {
if (
explicitTemplate === undefined &&
PROVIDER_HANDLES_TEMPLATING.includes(provider)
) {
return null;
}
const templateType = explicitTemplate ?? autodetectTemplateType(model);
if (templateType) {
const mapping: Record<
TemplateType,
null | ((msg: ChatMessage[]) => string)
> = {
llama2: llama2TemplateMessages,
alpaca: templateAlpacaMessages,
phi2: phi2TemplateMessages,
phind: phindTemplateMessages,
zephyr: zephyrTemplateMessages,
anthropic: anthropicTemplateMessages,
chatml: chatmlTemplateMessages,
deepseek: deepseekTemplateMessages,
openchat: openchatTemplateMessages,
"xwin-coder": xWinCoderTemplateMessages,
"neural-chat": neuralChatTemplateMessages,
llava: llavaTemplateMessages,
"codellama-70b": codeLlama70bTemplateMessages,
gemma: gemmaTemplateMessage,
granite: graniteTemplateMessages,
llama3: llama3TemplateMessages,
codestral: codestralTemplateMessages,
none: null,
};
return mapping[templateType];
}
return null;
}
const USES_OS_MODELS_EDIT_PROMPT: TemplateType[] = [
"alpaca",
"chatml",
// "codellama-70b", Doesn't respond well to this prompt
"deepseek",
"gemma",
"llama2",
"llava",
"neural-chat",
"openchat",
"phi2",
"phind",
"xwin-coder",
"zephyr",
"llama3",
];
function autodetectPromptTemplates(
model: string,
explicitTemplate: TemplateType | undefined = undefined,
) {
const templateType = explicitTemplate ?? autodetectTemplateType(model);
const templates: Record<string, any> = {};
let editTemplate = null;
if (templateType && USES_OS_MODELS_EDIT_PROMPT.includes(templateType)) {
// This is overriding basically everything else
// Will probably delete the rest later, but for now it's easy to revert
editTemplate = osModelsEditPrompt;
} else if (templateType === "phind") {
editTemplate = phindEditPrompt;
} else if (templateType === "phi2") {
editTemplate = simplifiedEditPrompt;
} else if (templateType === "zephyr") {
editTemplate = zephyrEditPrompt;
} else if (templateType === "llama2") {
if (model.includes("mistral")) {
editTemplate = mistralEditPrompt;
} else {
editTemplate = osModelsEditPrompt;
}
} else if (templateType === "alpaca") {
editTemplate = alpacaEditPrompt;
} else if (templateType === "deepseek") {
editTemplate = deepseekEditPrompt;
} else if (templateType === "openchat") {
editTemplate = openchatEditPrompt;
} else if (templateType === "xwin-coder") {
editTemplate = xWinCoderEditPrompt;
} else if (templateType === "neural-chat") {
editTemplate = neuralChatEditPrompt;
} else if (templateType === "codellama-70b") {
editTemplate = codeLlama70bEditPrompt;
} else if (templateType === "anthropic") {
editTemplate = claudeEditPrompt;
} else if (templateType === "gemma") {
editTemplate = gemmaEditPrompt;
} else if (templateType === "llama3") {
editTemplate = llama3EditPrompt;
} else if (templateType === "none") {
editTemplate = null;
} else if (templateType) {
editTemplate = gptEditPrompt;
} else if (model.includes("codestral")) {
editTemplate = osModelsEditPrompt;
}
if (editTemplate !== null) {
templates.edit = editTemplate;
}
return templates;
}
export {
autodetectPromptTemplates,
autodetectTemplateFunction,
autodetectTemplateType,
llmCanGenerateInParallel,
modelSupportsImages,
modelSupportsNextEdit,
modelSupportsReasoning,
};