743 lines
22 KiB
TypeScript
743 lines
22 KiB
TypeScript
import { Mutex } from "async-mutex";
|
|
import { JSONSchema7, JSONSchema7Object } from "json-schema";
|
|
import { v4 as uuidv4 } from "uuid";
|
|
|
|
import { streamResponse } from "@continuedev/fetch";
|
|
import {
|
|
ChatMessage,
|
|
ChatMessageRole,
|
|
CompletionOptions,
|
|
LLMOptions,
|
|
ModelInstaller,
|
|
ThinkingChatMessage,
|
|
} from "../../index.js";
|
|
import { renderChatMessage } from "../../util/messageContent.js";
|
|
import { getRemoteModelInfo } from "../../util/ollamaHelper.js";
|
|
import { extractBase64FromDataUrl } from "../../util/url.js";
|
|
import { BaseLLM } from "../index.js";
|
|
|
|
type OllamaChatMessage = {
|
|
role: ChatMessageRole;
|
|
content: string;
|
|
images?: string[] | null;
|
|
thinking?: string;
|
|
tool_calls?: {
|
|
function: {
|
|
name: string;
|
|
arguments: JSONSchema7Object;
|
|
};
|
|
}[];
|
|
};
|
|
|
|
// See https://github.com/ollama/ollama/blob/main/docs/modelfile.md for details on each parameter
|
|
interface OllamaModelFileParams {
|
|
mirostat?: number;
|
|
mirostat_eta?: number;
|
|
mirostat_tau?: number;
|
|
num_ctx?: number;
|
|
repeat_last_n?: number;
|
|
repeat_penalty?: number;
|
|
temperature?: number;
|
|
seed?: number;
|
|
stop?: string | string[];
|
|
tfs_z?: number;
|
|
num_predict?: number;
|
|
top_k?: number;
|
|
top_p?: number;
|
|
min_p?: number;
|
|
num_gpu?: number;
|
|
|
|
// Deprecated or not directly supported here:
|
|
num_thread?: number;
|
|
use_mmap?: boolean;
|
|
num_gqa?: number;
|
|
num_keep?: number;
|
|
typical_p?: number;
|
|
presence_penalty?: number;
|
|
frequency_penalty?: number;
|
|
penalize_newline?: boolean;
|
|
numa?: boolean;
|
|
num_batch?: number;
|
|
main_gpu?: number;
|
|
low_vram?: boolean;
|
|
vocab_only?: boolean;
|
|
use_mlock?: boolean;
|
|
}
|
|
|
|
// See https://github.com/ollama/ollama/blob/main/docs/api.md
|
|
interface OllamaBaseOptions {
|
|
model: string; // the model name
|
|
options?: OllamaModelFileParams; // additional model parameters listed in the documentation for the Modelfile such as temperature
|
|
format?: "json"; // the format to return a response in. Currently, the only accepted value is json
|
|
stream?: boolean; // if false the response will be returned as a single response object, rather than a stream of objects
|
|
keep_alive?: number; // controls how long the model will stay loaded into memory following the request (default: 5m)
|
|
}
|
|
|
|
interface OllamaRawOptions extends OllamaBaseOptions {
|
|
prompt: string; // the prompt to generate a response for
|
|
suffix?: string; // the text after the model response
|
|
images?: string[]; // a list of base64-encoded images (for multimodal models such as llava)
|
|
system?: string; // system message to (overrides what is defined in the Modelfile)
|
|
template?: string; // the prompt template to use (overrides what is defined in the Modelfile)
|
|
context?: string; // the context parameter returned from a previous request to /generate, this can be used to keep a short conversational memory
|
|
raw?: boolean; // if true no formatting will be applied to the prompt. You may choose to use the raw parameter if you are specifying a full templated prompt in your request to the API
|
|
}
|
|
|
|
interface OllamaChatOptions extends OllamaBaseOptions {
|
|
messages: OllamaChatMessage[]; // the messages of the chat, this can be used to keep a chat memory
|
|
tools?: OllamaTool[]; // the tools of the chat, this can be used to keep a tool memory
|
|
// Not supported yet - tools: tools for the model to use if supported. Requires stream to be set to false
|
|
// And correspondingly, tool calls in OllamaChatMessage
|
|
think?: boolean; // if true the model will be prompted to think about the response before generating it
|
|
}
|
|
|
|
type OllamaBaseResponse = {
|
|
model: string;
|
|
created_at: string;
|
|
} & (
|
|
| {
|
|
done: false;
|
|
}
|
|
| {
|
|
done: true;
|
|
done_reason: string;
|
|
total_duration: number; // Time spent generating the response in nanoseconds
|
|
load_duration: number; // Time spent loading the model in nanoseconds
|
|
prompt_eval_count: number; // Number of tokens in the prompt
|
|
prompt_eval_duration: number; // Time spent evaluating the prompt in nanoseconds
|
|
eval_count: number; // Number of tokens in the response
|
|
eval_duration: number; // Time spent generating the response in nanoseconds
|
|
context: number[]; // An encoding of the conversation used in this response; can be sent in the next request to keep conversational memory
|
|
}
|
|
);
|
|
|
|
type OllamaErrorResponse = {
|
|
error: string;
|
|
};
|
|
|
|
type N8nChatReponse = {
|
|
type: string;
|
|
content?: string;
|
|
metadata: {
|
|
nodeId: string;
|
|
nodeName: string;
|
|
itemIndex: number;
|
|
runIndex: number;
|
|
timestamps: number;
|
|
};
|
|
};
|
|
|
|
type OllamaRawResponse =
|
|
| OllamaErrorResponse
|
|
| (OllamaBaseResponse & {
|
|
response: string; // the generated response
|
|
});
|
|
|
|
type OllamaChatResponse =
|
|
| OllamaErrorResponse
|
|
| (OllamaBaseResponse & {
|
|
message: OllamaChatMessage;
|
|
})
|
|
| N8nChatReponse;
|
|
|
|
interface OllamaTool {
|
|
type: "function";
|
|
function: {
|
|
name: string;
|
|
description?: string;
|
|
parameters?: JSONSchema7;
|
|
};
|
|
}
|
|
|
|
class Ollama extends BaseLLM implements ModelInstaller {
|
|
static providerName = "ollama";
|
|
static defaultOptions: Partial<LLMOptions> = {
|
|
apiBase: "http://localhost:11434/",
|
|
model: "codellama-7b",
|
|
maxEmbeddingBatchSize: 64,
|
|
};
|
|
|
|
private static modelsBeingInstalled: Set<string> = new Set();
|
|
private static modelsBeingInstalledMutex = new Mutex();
|
|
|
|
private fimSupported: boolean = false;
|
|
constructor(options: LLMOptions) {
|
|
super(options);
|
|
|
|
if (options.model === "AUTODETECT") {
|
|
return;
|
|
}
|
|
const headers: Record<string, string> = {
|
|
"Content-Type": "application/json",
|
|
};
|
|
|
|
if (this.apiKey) {
|
|
headers.Authorization = `Bearer ${this.apiKey}`;
|
|
}
|
|
|
|
this.fetch(this.getEndpoint("api/show"), {
|
|
method: "POST",
|
|
headers: headers,
|
|
body: JSON.stringify({ name: this._getModel() }),
|
|
})
|
|
.then(async (response) => {
|
|
if (response?.status !== 200) {
|
|
// console.warn(
|
|
// "Error calling Ollama /api/show endpoint: ",
|
|
// await response.text(),
|
|
// );
|
|
return;
|
|
}
|
|
const body = await response.json();
|
|
if (body.parameters) {
|
|
const params = [];
|
|
for (const line of body.parameters.split("\n")) {
|
|
let parts = line.match(/^(\S+)\s+((?:".*")|\S+)$/);
|
|
if (parts.length < 2) {
|
|
continue;
|
|
}
|
|
let key = parts[1];
|
|
let value = parts[2];
|
|
switch (key) {
|
|
case "num_ctx":
|
|
this._contextLength =
|
|
options.contextLength ?? Number.parseInt(value);
|
|
break;
|
|
case "stop":
|
|
if (!this.completionOptions.stop) {
|
|
this.completionOptions.stop = [];
|
|
}
|
|
try {
|
|
this.completionOptions.stop.push(JSON.parse(value));
|
|
} catch (e) {
|
|
console.warn(
|
|
`Error parsing stop parameter value "{value}: ${e}`,
|
|
);
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* There is no API to get the model's FIM capabilities, so we have to
|
|
* make an educated guess. If a ".Suffix" variable appears in the template
|
|
* it's a good indication the model supports FIM.
|
|
*/
|
|
this.fimSupported = !!body?.template?.includes(".Suffix");
|
|
})
|
|
.catch((e) => {
|
|
// console.warn("Error calling the Ollama /api/show endpoint: ", e);
|
|
});
|
|
}
|
|
|
|
// Map of "continue model name" to Ollama actual model name
|
|
private modelMap: Record<string, string> = {
|
|
"mistral-7b": "mistral:7b",
|
|
"mixtral-8x7b": "mixtral:8x7b",
|
|
"llama2-7b": "llama2:7b",
|
|
"llama2-13b": "llama2:13b",
|
|
"codellama-7b": "codellama:7b",
|
|
"codellama-13b": "codellama:13b",
|
|
"codellama-34b": "codellama:34b",
|
|
"codellama-70b": "codellama:70b",
|
|
"llama3-8b": "llama3:8b",
|
|
"llama3-70b": "llama3:70b",
|
|
"llama3.1-8b": "llama3.1:8b",
|
|
"llama3.1-70b": "llama3.1:70b",
|
|
"llama3.1-405b": "llama3.1:405b",
|
|
"llama3.2-1b": "llama3.2:1b",
|
|
"llama3.2-3b": "llama3.2:3b",
|
|
"llama3.2-11b": "llama3.2:11b",
|
|
"llama3.2-90b": "llama3.2:90b",
|
|
"phi-2": "phi:2.7b",
|
|
"phind-codellama-34b": "phind-codellama:34b-v2",
|
|
"qwen2.5-coder-0.5b": "qwen2.5-coder:0.5b",
|
|
"qwen2.5-coder-1.5b": "qwen2.5-coder:1.5b",
|
|
"qwen2.5-coder-3b": "qwen2.5-coder:3b",
|
|
"qwen2.5-coder-7b": "qwen2.5-coder:7b",
|
|
"qwen2.5-coder-14b": "qwen2.5-coder:14b",
|
|
"qwen2.5-coder-32b": "qwen2.5-coder:32b",
|
|
"wizardcoder-7b": "wizardcoder:7b-python",
|
|
"wizardcoder-13b": "wizardcoder:13b-python",
|
|
"wizardcoder-34b": "wizardcoder:34b-python",
|
|
"zephyr-7b": "zephyr:7b",
|
|
"codeup-13b": "codeup:13b",
|
|
"deepseek-1b": "deepseek-coder:1.3b",
|
|
"deepseek-7b": "deepseek-coder:6.7b",
|
|
"deepseek-33b": "deepseek-coder:33b",
|
|
"neural-chat-7b": "neural-chat:7b-v3.3",
|
|
"starcoder-1b": "starcoder:1b",
|
|
"starcoder-3b": "starcoder:3b",
|
|
"starcoder2-3b": "starcoder2:3b",
|
|
"stable-code-3b": "stable-code:3b",
|
|
"granite-code-3b": "granite-code:3b",
|
|
"granite-code-8b": "granite-code:8b",
|
|
"granite-code-20b": "granite-code:20b",
|
|
"granite-code-34b": "granite-code:34b",
|
|
};
|
|
|
|
private _getModel() {
|
|
return this.modelMap[this.model] ?? this.model;
|
|
}
|
|
|
|
get contextLength() {
|
|
const DEFAULT_OLLAMA_CONTEXT_LENGTH = 8192; // twice of https://github.com/ollama/ollama/blob/29ddfc2cab7f5a83a96c3133094f67b22e4f27d1/envconfig/config.go#L185
|
|
return this._contextLength ?? DEFAULT_OLLAMA_CONTEXT_LENGTH;
|
|
}
|
|
|
|
private _getModelFileParams(
|
|
options: CompletionOptions,
|
|
): OllamaModelFileParams {
|
|
return {
|
|
temperature: options.temperature,
|
|
top_p: options.topP,
|
|
top_k: options.topK,
|
|
num_predict: options.maxTokens,
|
|
stop: options.stop,
|
|
num_ctx: this.contextLength,
|
|
mirostat: options.mirostat,
|
|
num_thread: options.numThreads,
|
|
use_mmap: options.useMmap,
|
|
min_p: options.minP,
|
|
num_gpu: options.numGpu,
|
|
};
|
|
}
|
|
|
|
private _convertToOllamaMessage(message: ChatMessage): OllamaChatMessage {
|
|
const ollamaMessage: OllamaChatMessage = {
|
|
role: message.role,
|
|
content: "",
|
|
};
|
|
|
|
ollamaMessage.content = renderChatMessage(message);
|
|
if (Array.isArray(message.content)) {
|
|
const images: string[] = [];
|
|
message.content.forEach((part) => {
|
|
if (part.type === "imageUrl" && part.imageUrl) {
|
|
const image = part.imageUrl?.url
|
|
? extractBase64FromDataUrl(part.imageUrl.url)
|
|
: undefined;
|
|
if (image) {
|
|
images.push(image);
|
|
} else if (part.imageUrl?.url) {
|
|
console.warn(
|
|
"Ollama: skipping image with invalid data URL format",
|
|
part.imageUrl.url,
|
|
);
|
|
}
|
|
}
|
|
});
|
|
if (images.length > 0) {
|
|
ollamaMessage.images = images;
|
|
}
|
|
}
|
|
|
|
return ollamaMessage;
|
|
}
|
|
|
|
private _getGenerateOptions(
|
|
options: CompletionOptions,
|
|
prompt: string,
|
|
suffix?: string,
|
|
): OllamaRawOptions {
|
|
return {
|
|
model: this._getModel(),
|
|
prompt,
|
|
suffix,
|
|
raw: options.raw,
|
|
options: this._getModelFileParams(options),
|
|
keep_alive: options.keepAlive ?? 60 * 30, // 30 minutes
|
|
stream: options.stream,
|
|
// Not supported yet: context, images, system, template, format
|
|
};
|
|
}
|
|
|
|
private getEndpoint(endpoint: string): URL {
|
|
let base = this.apiBase;
|
|
if (process.env.IS_BINARY) {
|
|
base = base?.replace("localhost", "127.0.0.1");
|
|
}
|
|
|
|
return new URL(endpoint, base);
|
|
}
|
|
|
|
protected async *_streamComplete(
|
|
prompt: string,
|
|
signal: AbortSignal,
|
|
options: CompletionOptions,
|
|
): AsyncGenerator<string> {
|
|
const headers: Record<string, string> = {
|
|
"Content-Type": "application/json",
|
|
};
|
|
|
|
if (this.apiKey) {
|
|
headers.Authorization = `Bearer ${this.apiKey}`;
|
|
}
|
|
const response = await this.fetch(this.getEndpoint("api/generate"), {
|
|
method: "POST",
|
|
headers: headers,
|
|
body: JSON.stringify(this._getGenerateOptions(options, prompt)),
|
|
signal,
|
|
});
|
|
|
|
let buffer = "";
|
|
for await (const value of streamResponse(response)) {
|
|
// Append the received chunk to the buffer
|
|
buffer += value;
|
|
// Split the buffer into individual JSON chunks
|
|
const chunks = buffer.split("\n");
|
|
buffer = chunks.pop() ?? "";
|
|
|
|
for (let i = 0; i < chunks.length; i++) {
|
|
const chunk = chunks[i];
|
|
if (chunk.trim() !== "") {
|
|
try {
|
|
const j = JSON.parse(chunk) as OllamaRawResponse;
|
|
if ("error" in j) {
|
|
throw new Error(j.error);
|
|
}
|
|
j.response ??= "";
|
|
yield j.response;
|
|
} catch (e) {
|
|
throw new Error(`Error parsing Ollama response: ${e} ${chunk}`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
protected async *_streamChat(
|
|
messages: ChatMessage[],
|
|
signal: AbortSignal,
|
|
options: CompletionOptions,
|
|
): AsyncGenerator<ChatMessage> {
|
|
const ollamaMessages = messages.map(this._convertToOllamaMessage);
|
|
const chatOptions: OllamaChatOptions = {
|
|
model: this._getModel(),
|
|
messages: ollamaMessages,
|
|
options: this._getModelFileParams(options),
|
|
think: options.reasoning,
|
|
keep_alive: options.keepAlive ?? 60 * 30, // 30 minutes
|
|
stream: options.stream,
|
|
// format: options.format, // Not currently in base completion options
|
|
};
|
|
// This logic is because tools can ONLY be included with user message for ollama
|
|
if (options.tools?.length && ollamaMessages.at(-1)?.role === "user") {
|
|
chatOptions.tools = options.tools.map((tool) => ({
|
|
type: "function",
|
|
function: {
|
|
name: tool.function.name,
|
|
description: tool.function.description,
|
|
parameters: tool.function.parameters,
|
|
},
|
|
}));
|
|
}
|
|
const headers: Record<string, string> = {
|
|
"Content-Type": "application/json",
|
|
};
|
|
|
|
if (this.apiKey) {
|
|
headers.Authorization = `Bearer ${this.apiKey}`;
|
|
}
|
|
const response = await this.fetch(this.getEndpoint("api/chat"), {
|
|
method: "POST",
|
|
headers: headers,
|
|
body: JSON.stringify(chatOptions),
|
|
signal,
|
|
});
|
|
let isThinking: boolean = false;
|
|
|
|
function convertChatMessage(res: OllamaChatResponse): ChatMessage[] {
|
|
if ("error" in res) {
|
|
throw new Error(res.error);
|
|
}
|
|
|
|
if ("type" in res) {
|
|
const { content } = res;
|
|
|
|
if (content === "<think>") {
|
|
isThinking = true;
|
|
}
|
|
|
|
if (isThinking && content) {
|
|
// TODO better support for streaming thinking chunks, or remove this and depend on redux <think/> parsing logic
|
|
const thinkingMessage: ThinkingChatMessage = {
|
|
role: "thinking",
|
|
content: content,
|
|
};
|
|
|
|
if (thinkingMessage) {
|
|
// could cause issues with termination if chunk doesn't match this exactly
|
|
if (content === "</think>") {
|
|
isThinking = false;
|
|
}
|
|
// When Streaming you can't have both thinking and content
|
|
return [thinkingMessage];
|
|
}
|
|
}
|
|
|
|
if (content) {
|
|
const chatMessage: ChatMessage = {
|
|
role: "assistant",
|
|
content: content,
|
|
};
|
|
return [chatMessage];
|
|
}
|
|
return [];
|
|
}
|
|
|
|
const { role, content, thinking, tool_calls: toolCalls } = res.message;
|
|
|
|
if (role === "tool") {
|
|
throw new Error(
|
|
"Unexpected message received from Ollama with role = tool",
|
|
);
|
|
}
|
|
|
|
if (role === "assistant") {
|
|
const thinkingMessage: ThinkingChatMessage | null = thinking
|
|
? { role: "thinking", content: thinking }
|
|
: null;
|
|
|
|
if (thinkingMessage && !content) {
|
|
// When Streaming you can't have both thinking and content
|
|
return [thinkingMessage];
|
|
}
|
|
// Either not thinking, or not streaming
|
|
const chatMessage: ChatMessage = { role: "assistant", content };
|
|
|
|
if (toolCalls?.length) {
|
|
// Continue handles the response as a tool call delta but
|
|
// But ollama returns the full object in one response with no streaming
|
|
chatMessage.toolCalls = toolCalls.map((tc) => ({
|
|
type: "function",
|
|
id: `tc_${uuidv4()}`, // Generate a proper UUID with a prefix
|
|
function: {
|
|
name: tc.function.name,
|
|
arguments: JSON.stringify(tc.function.arguments),
|
|
},
|
|
}));
|
|
}
|
|
|
|
// Return both thinking and chat messages if applicable
|
|
return thinkingMessage ? [thinkingMessage, chatMessage] : [chatMessage];
|
|
}
|
|
|
|
// Fallback for all other roles
|
|
return [{ role, content }];
|
|
}
|
|
|
|
if (chatOptions.stream === false) {
|
|
if (response.status === 499) {
|
|
return; // Aborted by user
|
|
}
|
|
const json = (await response.json()) as OllamaChatResponse;
|
|
for (const msg of convertChatMessage(json)) {
|
|
yield msg;
|
|
}
|
|
} else {
|
|
let buffer = "";
|
|
for await (const value of streamResponse(response)) {
|
|
// Append the received chunk to the buffer
|
|
buffer += value;
|
|
// Split the buffer into individual JSON chunks
|
|
const chunks = buffer.split("\n");
|
|
buffer = chunks.pop() ?? "";
|
|
|
|
for (let i = 0; i < chunks.length; i++) {
|
|
const chunk = chunks[i];
|
|
if (chunk.trim() !== "") {
|
|
try {
|
|
const j = JSON.parse(chunk) as OllamaChatResponse;
|
|
for (const msg of convertChatMessage(j)) {
|
|
yield msg;
|
|
}
|
|
} catch (e) {
|
|
throw new Error(`Error parsing Ollama response: ${e} ${chunk}`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
supportsFim(): boolean {
|
|
return this.fimSupported;
|
|
}
|
|
|
|
protected async *_streamFim(
|
|
prefix: string,
|
|
suffix: string,
|
|
signal: AbortSignal,
|
|
options: CompletionOptions,
|
|
): AsyncGenerator<string> {
|
|
const headers: Record<string, string> = {
|
|
"Content-Type": "application/json",
|
|
};
|
|
|
|
if (this.apiKey) {
|
|
headers.Authorization = `Bearer ${this.apiKey}`;
|
|
}
|
|
const response = await this.fetch(this.getEndpoint("api/generate"), {
|
|
method: "POST",
|
|
headers: headers,
|
|
body: JSON.stringify(this._getGenerateOptions(options, prefix, suffix)),
|
|
signal,
|
|
});
|
|
|
|
let buffer = "";
|
|
for await (const value of streamResponse(response)) {
|
|
// Append the received chunk to the buffer
|
|
buffer += value;
|
|
// Split the buffer into individual JSON chunks
|
|
const chunks = buffer.split("\n");
|
|
buffer = chunks.pop() ?? "";
|
|
|
|
for (let i = 0; i < chunks.length; i++) {
|
|
const chunk = chunks[i];
|
|
if (chunk.trim() !== "") {
|
|
try {
|
|
const j = JSON.parse(chunk);
|
|
if ("response" in j) {
|
|
yield j.response;
|
|
} else if ("error" in j) {
|
|
throw new Error(j.error);
|
|
}
|
|
} catch (e) {
|
|
throw new Error(`Error parsing Ollama response: ${e} ${chunk}`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
async listModels(): Promise<string[]> {
|
|
const headers: Record<string, string> = {
|
|
"Content-Type": "application/json",
|
|
};
|
|
|
|
if (this.apiKey) {
|
|
headers.Authorization = `Bearer ${this.apiKey}`;
|
|
}
|
|
const response = await this.fetch(
|
|
// localhost was causing fetch failed in pkg binary only for this Ollama endpoint
|
|
this.getEndpoint("api/tags"),
|
|
{
|
|
method: "GET",
|
|
headers: headers,
|
|
},
|
|
);
|
|
const data = await response.json();
|
|
if (response.ok) {
|
|
return data.models.map((model: any) => model.name);
|
|
} else {
|
|
throw new Error(
|
|
"Failed to list Ollama models. Make sure Ollama is running.",
|
|
);
|
|
}
|
|
}
|
|
|
|
protected async _embed(chunks: string[]): Promise<number[][]> {
|
|
const headers: Record<string, string> = {
|
|
"Content-Type": "application/json",
|
|
};
|
|
|
|
if (this.apiKey) {
|
|
headers.Authorization = `Bearer ${this.apiKey}`;
|
|
}
|
|
const resp = await this.fetch(new URL("api/embed", this.apiBase), {
|
|
method: "POST",
|
|
body: JSON.stringify({
|
|
model: this.model,
|
|
input: chunks,
|
|
}),
|
|
headers: headers,
|
|
});
|
|
|
|
if (!resp.ok) {
|
|
throw new Error(`Failed to embed chunk: ${await resp.text()}`);
|
|
}
|
|
|
|
const data = await resp.json();
|
|
const embedding: number[][] = data.embeddings;
|
|
|
|
if (!embedding || embedding.length === 0) {
|
|
throw new Error("Ollama generated empty embedding");
|
|
}
|
|
return embedding;
|
|
}
|
|
|
|
public async installModel(
|
|
modelName: string,
|
|
signal: AbortSignal,
|
|
progressReporter?: (task: string, increment: number, total: number) => void,
|
|
): Promise<any> {
|
|
const modelInfo = await getRemoteModelInfo(modelName, signal);
|
|
if (!modelInfo) {
|
|
throw new Error(`'${modelName}' not found in the Ollama registry!`);
|
|
}
|
|
|
|
const release = await Ollama.modelsBeingInstalledMutex.acquire();
|
|
try {
|
|
if (Ollama.modelsBeingInstalled.has(modelName)) {
|
|
throw new Error(`Model '${modelName}' is already being installed.`);
|
|
}
|
|
Ollama.modelsBeingInstalled.add(modelName);
|
|
} finally {
|
|
release();
|
|
}
|
|
|
|
try {
|
|
const response = await fetch(this.getEndpoint("api/pull"), {
|
|
method: "POST",
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
Authorization: `Bearer ${this.apiKey}`,
|
|
},
|
|
body: JSON.stringify({ name: modelName }),
|
|
signal,
|
|
});
|
|
|
|
const reader = response.body?.getReader();
|
|
//TODO: generate proper progress based on modelInfo size
|
|
while (true) {
|
|
const { done, value } = (await reader?.read()) || {
|
|
done: true,
|
|
value: undefined,
|
|
};
|
|
if (done) {
|
|
break;
|
|
}
|
|
|
|
const chunk = new TextDecoder().decode(value);
|
|
const lines = chunk.split("\n").filter(Boolean);
|
|
for (const line of lines) {
|
|
const data = JSON.parse(line);
|
|
progressReporter?.(data.status, data.completed, data.total);
|
|
}
|
|
}
|
|
} finally {
|
|
const release = await Ollama.modelsBeingInstalledMutex.acquire();
|
|
try {
|
|
Ollama.modelsBeingInstalled.delete(modelName);
|
|
} finally {
|
|
release();
|
|
}
|
|
}
|
|
}
|
|
|
|
public async isInstallingModel(modelName: string): Promise<boolean> {
|
|
const release = await Ollama.modelsBeingInstalledMutex.acquire();
|
|
try {
|
|
return Ollama.modelsBeingInstalled.has(modelName);
|
|
} finally {
|
|
release();
|
|
}
|
|
}
|
|
}
|
|
|
|
export default Ollama;
|