add clip-bpe-ts package as util

This commit is contained in:
Abhinav 2024-01-05 00:03:59 +05:30
parent 876f52cd70
commit c7ae4cc920
6 changed files with 20 additions and 8 deletions

View file

@ -33,7 +33,8 @@ const TEXT_EMBEDDING_EXTRACT_CMD: string[] = [
INPUT_PATH_PLACEHOLDER,
];
const ort = require('onnxruntime-node');
const { encode } = require('gpt-3-encoder');
import Tokenizer from '../utils/clip-bpe-ts/mod';
const { createCanvas, Image } = require('canvas');
const TEXT_MODEL_DOWNLOAD_URL = {
@ -198,6 +199,14 @@ async function getOnnxTextSession() {
return onnxTextSession;
}
let tokenizer: Tokenizer = null;
function getTokenizer() {
if (!tokenizer) {
tokenizer = new Tokenizer();
}
return tokenizer;
}
export async function computeImageEmbedding(
inputFilePath: string
): Promise<Float32Array> {
@ -274,7 +283,7 @@ export async function computeTextEmbedding(
ggmlTextEmbedding,
onnxTextEmbedding
);
console.log('textEmbeddingScore', score);
log.info('textEmbeddingScore', score);
return onnxTextEmbedding;
}
@ -323,13 +332,14 @@ export async function computeONNXTextEmbedding(
): Promise<Float32Array> {
try {
const imageSession = await getOnnxTextSession();
const tokenizedText = Int32Array.from(encode(text));
const tokenizer = getTokenizer();
const tokenizedText = Int32Array.from(tokenizer.encodeForCLIP(text));
const feeds = {
input: new ort.Tensor('int32', tokenizedText, [1, 77]),
};
const results = await imageSession.run(feeds);
console.log('result', results);
return new Float32Array();
const embedVec = results['output'].data; // Float32Array
return embedVec;
} catch (err) {
if (err.message === CustomErrors.MODEL_DOWNLOAD_PENDING) {
log.info(CustomErrors.MODEL_DOWNLOAD_PENDING);

File diff suppressed because one or more lines are too long

View file

@ -1,5 +1,5 @@
import * as htmlEntities from 'html-entities';
import bpeVocabData from './bpe_simple_vocab_16e6.mjs';
import bpeVocabData from './bpe_simple_vocab_16e6';
// import ftfy from "https://deno.land/x/ftfy_pyodide@v0.1.1/mod.js";
function ord(c: string) {

File diff suppressed because one or more lines are too long