512 lines
11 KiB
TypeScript
512 lines
11 KiB
TypeScript
import {
|
|
ChatMessage,
|
|
ModelCapability,
|
|
ModelDescription,
|
|
TemplateType,
|
|
} from "../index.js";
|
|
import { NEXT_EDIT_MODELS } from "./constants.js";
|
|
|
|
import {
|
|
anthropicTemplateMessages,
|
|
chatmlTemplateMessages,
|
|
codeLlama70bTemplateMessages,
|
|
codestralTemplateMessages,
|
|
deepseekTemplateMessages,
|
|
gemmaTemplateMessage,
|
|
graniteTemplateMessages,
|
|
llama2TemplateMessages,
|
|
llama3TemplateMessages,
|
|
llavaTemplateMessages,
|
|
neuralChatTemplateMessages,
|
|
openchatTemplateMessages,
|
|
phi2TemplateMessages,
|
|
phindTemplateMessages,
|
|
templateAlpacaMessages,
|
|
xWinCoderTemplateMessages,
|
|
zephyrTemplateMessages,
|
|
} from "./templates/chat.js";
|
|
import {
|
|
alpacaEditPrompt,
|
|
claudeEditPrompt,
|
|
codeLlama70bEditPrompt,
|
|
deepseekEditPrompt,
|
|
gemmaEditPrompt,
|
|
gptEditPrompt,
|
|
llama3EditPrompt,
|
|
mistralEditPrompt,
|
|
neuralChatEditPrompt,
|
|
openchatEditPrompt,
|
|
osModelsEditPrompt,
|
|
phindEditPrompt,
|
|
simplifiedEditPrompt,
|
|
xWinCoderEditPrompt,
|
|
zephyrEditPrompt,
|
|
} from "./templates/edit.js";
|
|
|
|
const PROVIDER_HANDLES_TEMPLATING: string[] = [
|
|
"lmstudio",
|
|
"lemonade",
|
|
"openai",
|
|
"nvidia",
|
|
"ollama",
|
|
"together",
|
|
"novita",
|
|
"msty",
|
|
"anthropic",
|
|
"bedrock",
|
|
"cohere",
|
|
"sagemaker",
|
|
"continue-proxy",
|
|
"mistral",
|
|
"mimo",
|
|
"sambanova",
|
|
"vertexai",
|
|
"watsonx",
|
|
"nebius",
|
|
"relace",
|
|
"openrouter",
|
|
"deepseek",
|
|
"xAI",
|
|
"groq",
|
|
"gemini",
|
|
"docker",
|
|
// TODO add these, change to inverted logic so only the ones that need templating are hardcoded
|
|
// Asksage.ts
|
|
// Azure.ts
|
|
// BedrockImport.ts
|
|
// Cerebras.ts
|
|
// Cloudflare.ts
|
|
// CometAPI.ts
|
|
// CustomLLM.ts
|
|
// DeepInfra.ts
|
|
// Fireworks.ts
|
|
// Flowise.ts
|
|
// FunctionNetwork.ts
|
|
// HuggingFaceInferenceAPI.ts
|
|
// HuggingFaceTEI.ts
|
|
// HuggingFaceTGI.ts
|
|
// Inception.ts
|
|
// Kindo.ts
|
|
// LlamaCpp.ts
|
|
// LlamaStack.ts
|
|
// Llamafile.ts
|
|
// Mock.ts
|
|
// Moonshot.ts
|
|
// NCompass.ts
|
|
// OVHcloud.ts
|
|
// Replicate.ts
|
|
// Scaleway.ts
|
|
// SiliconFlow.ts
|
|
// TARS.ts
|
|
// Test.ts
|
|
// TextGenWebUI.ts
|
|
// TransformersJsEmbeddingsProvider.ts
|
|
// Venice.ts
|
|
// Vllm.ts
|
|
// Voyage.ts
|
|
// etc
|
|
];
|
|
|
|
const PROVIDER_SUPPORTS_IMAGES: string[] = [
|
|
"openai",
|
|
"ollama",
|
|
"lemonade",
|
|
"cohere",
|
|
"gemini",
|
|
"msty",
|
|
"anthropic",
|
|
"bedrock",
|
|
"sagemaker",
|
|
"continue-proxy",
|
|
"openrouter",
|
|
"venice",
|
|
"sambanova",
|
|
"vertexai",
|
|
"azure",
|
|
"scaleway",
|
|
"nebius",
|
|
"ovhcloud",
|
|
"watsonx",
|
|
];
|
|
|
|
const MODEL_SUPPORTS_IMAGES: RegExp[] = [
|
|
/llava/,
|
|
/gpt-4-turbo/,
|
|
/gpt-4o/,
|
|
/gpt-4o-mini/,
|
|
/claude-3/,
|
|
/gemini-ultra/,
|
|
/gemini-1\.5-pro/,
|
|
/gemini-1\.5-flash/,
|
|
/sonnet/,
|
|
/opus/,
|
|
/haiku/,
|
|
/pixtral/,
|
|
/llama-?3\.2/,
|
|
/llama-?4/, // might use something like /llama-?(?:[4-9](?:\.\d+)?|\d{2,}(?:\.\d+)?)/ for forward compat, if needed
|
|
/\bgemma-?3(?!n)/, // gemma3 supports vision, but gemma3n doesn't!
|
|
/\b(pali|med)gemma/,
|
|
/qwen(.*)vl/,
|
|
/mistral-small/,
|
|
/mistral-medium/,
|
|
];
|
|
|
|
function modelSupportsImages(
|
|
provider: string,
|
|
model: string,
|
|
title: string | undefined,
|
|
capabilities: ModelCapability | undefined,
|
|
): boolean {
|
|
if (capabilities?.uploadImage !== undefined) {
|
|
return capabilities.uploadImage;
|
|
}
|
|
if (!PROVIDER_SUPPORTS_IMAGES.includes(provider)) {
|
|
return false;
|
|
}
|
|
|
|
const lowerModel = model.toLowerCase();
|
|
const lowerTitle = title?.toLowerCase() ?? "";
|
|
|
|
if (
|
|
lowerModel.includes("vision") ||
|
|
lowerTitle.includes("vision") ||
|
|
MODEL_SUPPORTS_IMAGES.some(
|
|
(modelrx) => modelrx.test(lowerModel) || modelrx.test(lowerTitle),
|
|
)
|
|
) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
function modelSupportsReasoning(
|
|
model: ModelDescription | null | undefined,
|
|
): boolean {
|
|
if (!model) {
|
|
return false;
|
|
}
|
|
if (model.completionOptions?.reasoning !== undefined) {
|
|
// Reasoning support is forced at the config level. Model might not necessarily support it though!
|
|
return model.completionOptions.reasoning;
|
|
}
|
|
// Seems our current way of disabling reasoning is not working for grok code so results in useless lightbulb
|
|
// if (model.model.includes("grok-code")) {
|
|
// return true;
|
|
// }
|
|
// do not turn reasoning on by default for claude 3 models
|
|
if (
|
|
model.model.includes("claude") &&
|
|
!model.model.includes("-3-") &&
|
|
!model.model.includes("-3.5-")
|
|
) {
|
|
return true;
|
|
}
|
|
if (model.model.includes("command-a-reasoning")) {
|
|
return true;
|
|
}
|
|
if (model.model.includes("deepseek-r")) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
const PARALLEL_PROVIDERS: string[] = [
|
|
"anthropic",
|
|
"bedrock",
|
|
"cohere",
|
|
"sagemaker",
|
|
"deepinfra",
|
|
"gemini",
|
|
"huggingface-inference-api",
|
|
"huggingface-tgi",
|
|
"mistral",
|
|
"moonshot",
|
|
"replicate",
|
|
"together",
|
|
"novita",
|
|
"sambanova",
|
|
"ovhcloud",
|
|
"nebius",
|
|
"vertexai",
|
|
"function-network",
|
|
"scaleway",
|
|
];
|
|
|
|
function llmCanGenerateInParallel(provider: string, model: string): boolean {
|
|
if (provider === "openai") {
|
|
return model.includes("gpt");
|
|
}
|
|
|
|
return PARALLEL_PROVIDERS.includes(provider);
|
|
}
|
|
|
|
function isProviderHandlesTemplatingOrNoTemplateTypeRequired(
|
|
modelName: string,
|
|
): boolean {
|
|
return (
|
|
modelName.includes("gpt") ||
|
|
modelName.includes("command") ||
|
|
modelName.includes("aya") ||
|
|
modelName.includes("chat-bison") ||
|
|
modelName.includes("pplx") ||
|
|
modelName.includes("gemini") ||
|
|
modelName.includes("grok") ||
|
|
modelName.includes("moonshot") ||
|
|
modelName.includes("kimi") ||
|
|
modelName.includes("mercury") ||
|
|
/^o\d/.test(modelName)
|
|
);
|
|
}
|
|
|
|
// NOTE: When updating this list,
|
|
// update core/nextEdit/templating/NextEditPromptEngine.ts as well.
|
|
const MODEL_SUPPORTS_NEXT_EDIT: string[] = [
|
|
NEXT_EDIT_MODELS.MERCURY_CODER,
|
|
NEXT_EDIT_MODELS.INSTINCT,
|
|
];
|
|
|
|
function modelSupportsNextEdit(
|
|
capabilities: ModelCapability | undefined,
|
|
model: string,
|
|
title: string | undefined,
|
|
): boolean {
|
|
if (capabilities?.nextEdit !== undefined) {
|
|
return capabilities.nextEdit;
|
|
}
|
|
|
|
const lower = model.toLowerCase();
|
|
if (
|
|
MODEL_SUPPORTS_NEXT_EDIT.some(
|
|
(modelName) => lower.includes(modelName) || title?.includes(modelName),
|
|
)
|
|
) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
function autodetectTemplateType(model: string): TemplateType | undefined {
|
|
const lower = model.toLowerCase();
|
|
|
|
if (lower.includes("codellama") && lower.includes("70b")) {
|
|
return "codellama-70b";
|
|
}
|
|
|
|
if (isProviderHandlesTemplatingOrNoTemplateTypeRequired(lower)) {
|
|
return undefined;
|
|
}
|
|
|
|
if (lower.includes("llama3") || lower.includes("llama-3")) {
|
|
return "llama3";
|
|
}
|
|
|
|
if (lower.includes("llava")) {
|
|
return "llava";
|
|
}
|
|
|
|
if (lower.includes("tinyllama")) {
|
|
return "zephyr";
|
|
}
|
|
|
|
if (lower.includes("xwin")) {
|
|
return "xwin-coder";
|
|
}
|
|
|
|
if (lower.includes("dolphin")) {
|
|
return "chatml";
|
|
}
|
|
|
|
if (lower.includes("gemma")) {
|
|
return "gemma";
|
|
}
|
|
|
|
if (lower.includes("phi2")) {
|
|
return "phi2";
|
|
}
|
|
|
|
if (lower.includes("phind")) {
|
|
return "phind";
|
|
}
|
|
|
|
if (lower.includes("llama")) {
|
|
return "llama2";
|
|
}
|
|
|
|
if (lower.includes("zephyr")) {
|
|
return "zephyr";
|
|
}
|
|
|
|
// Claude requests always sent through Messages API, so formatting not necessary
|
|
if (lower.includes("claude")) {
|
|
return "none";
|
|
}
|
|
|
|
// Nova Pro requests always sent through Converse API, so formatting not necessary
|
|
if (lower.includes("nova")) {
|
|
return "none";
|
|
}
|
|
|
|
if (lower.includes("codestral")) {
|
|
return "none";
|
|
}
|
|
|
|
if (lower.includes("alpaca") || lower.includes("wizard")) {
|
|
return "alpaca";
|
|
}
|
|
|
|
if (lower.includes("mistral") || lower.includes("mixtral")) {
|
|
return "llama2";
|
|
}
|
|
|
|
if (lower.includes("deepseek")) {
|
|
return "deepseek";
|
|
}
|
|
|
|
if (lower.includes("ninja") || lower.includes("openchat")) {
|
|
return "openchat";
|
|
}
|
|
|
|
if (lower.includes("neural-chat")) {
|
|
return "neural-chat";
|
|
}
|
|
|
|
if (lower.includes("granite")) {
|
|
return "granite";
|
|
}
|
|
|
|
return "chatml";
|
|
}
|
|
|
|
function autodetectTemplateFunction(
|
|
model: string,
|
|
provider: string,
|
|
explicitTemplate: TemplateType | undefined = undefined,
|
|
) {
|
|
if (
|
|
explicitTemplate === undefined &&
|
|
PROVIDER_HANDLES_TEMPLATING.includes(provider)
|
|
) {
|
|
return null;
|
|
}
|
|
|
|
const templateType = explicitTemplate ?? autodetectTemplateType(model);
|
|
|
|
if (templateType) {
|
|
const mapping: Record<
|
|
TemplateType,
|
|
null | ((msg: ChatMessage[]) => string)
|
|
> = {
|
|
llama2: llama2TemplateMessages,
|
|
alpaca: templateAlpacaMessages,
|
|
phi2: phi2TemplateMessages,
|
|
phind: phindTemplateMessages,
|
|
zephyr: zephyrTemplateMessages,
|
|
anthropic: anthropicTemplateMessages,
|
|
chatml: chatmlTemplateMessages,
|
|
deepseek: deepseekTemplateMessages,
|
|
openchat: openchatTemplateMessages,
|
|
"xwin-coder": xWinCoderTemplateMessages,
|
|
"neural-chat": neuralChatTemplateMessages,
|
|
llava: llavaTemplateMessages,
|
|
"codellama-70b": codeLlama70bTemplateMessages,
|
|
gemma: gemmaTemplateMessage,
|
|
granite: graniteTemplateMessages,
|
|
llama3: llama3TemplateMessages,
|
|
codestral: codestralTemplateMessages,
|
|
none: null,
|
|
};
|
|
|
|
return mapping[templateType];
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
const USES_OS_MODELS_EDIT_PROMPT: TemplateType[] = [
|
|
"alpaca",
|
|
"chatml",
|
|
// "codellama-70b", Doesn't respond well to this prompt
|
|
"deepseek",
|
|
"gemma",
|
|
"llama2",
|
|
"llava",
|
|
"neural-chat",
|
|
"openchat",
|
|
"phi2",
|
|
"phind",
|
|
"xwin-coder",
|
|
"zephyr",
|
|
"llama3",
|
|
];
|
|
|
|
function autodetectPromptTemplates(
|
|
model: string,
|
|
explicitTemplate: TemplateType | undefined = undefined,
|
|
) {
|
|
const templateType = explicitTemplate ?? autodetectTemplateType(model);
|
|
const templates: Record<string, any> = {};
|
|
|
|
let editTemplate = null;
|
|
|
|
if (templateType && USES_OS_MODELS_EDIT_PROMPT.includes(templateType)) {
|
|
// This is overriding basically everything else
|
|
// Will probably delete the rest later, but for now it's easy to revert
|
|
editTemplate = osModelsEditPrompt;
|
|
} else if (templateType === "phind") {
|
|
editTemplate = phindEditPrompt;
|
|
} else if (templateType === "phi2") {
|
|
editTemplate = simplifiedEditPrompt;
|
|
} else if (templateType === "zephyr") {
|
|
editTemplate = zephyrEditPrompt;
|
|
} else if (templateType === "llama2") {
|
|
if (model.includes("mistral")) {
|
|
editTemplate = mistralEditPrompt;
|
|
} else {
|
|
editTemplate = osModelsEditPrompt;
|
|
}
|
|
} else if (templateType === "alpaca") {
|
|
editTemplate = alpacaEditPrompt;
|
|
} else if (templateType === "deepseek") {
|
|
editTemplate = deepseekEditPrompt;
|
|
} else if (templateType === "openchat") {
|
|
editTemplate = openchatEditPrompt;
|
|
} else if (templateType === "xwin-coder") {
|
|
editTemplate = xWinCoderEditPrompt;
|
|
} else if (templateType === "neural-chat") {
|
|
editTemplate = neuralChatEditPrompt;
|
|
} else if (templateType === "codellama-70b") {
|
|
editTemplate = codeLlama70bEditPrompt;
|
|
} else if (templateType === "anthropic") {
|
|
editTemplate = claudeEditPrompt;
|
|
} else if (templateType === "gemma") {
|
|
editTemplate = gemmaEditPrompt;
|
|
} else if (templateType === "llama3") {
|
|
editTemplate = llama3EditPrompt;
|
|
} else if (templateType === "none") {
|
|
editTemplate = null;
|
|
} else if (templateType) {
|
|
editTemplate = gptEditPrompt;
|
|
} else if (model.includes("codestral")) {
|
|
editTemplate = osModelsEditPrompt;
|
|
}
|
|
|
|
if (editTemplate !== null) {
|
|
templates.edit = editTemplate;
|
|
}
|
|
|
|
return templates;
|
|
}
|
|
|
|
export {
|
|
autodetectPromptTemplates,
|
|
autodetectTemplateFunction,
|
|
autodetectTemplateType,
|
|
llmCanGenerateInParallel,
|
|
modelSupportsImages,
|
|
modelSupportsNextEdit,
|
|
modelSupportsReasoning,
|
|
};
|