add clip-bpe-ts package as util
This commit is contained in:
parent
876f52cd70
commit
c7ae4cc920
6 changed files with 20 additions and 8 deletions
|
@ -33,7 +33,8 @@ const TEXT_EMBEDDING_EXTRACT_CMD: string[] = [
|
|||
INPUT_PATH_PLACEHOLDER,
|
||||
];
|
||||
const ort = require('onnxruntime-node');
|
||||
const { encode } = require('gpt-3-encoder');
|
||||
import Tokenizer from '../utils/clip-bpe-ts/mod';
|
||||
|
||||
const { createCanvas, Image } = require('canvas');
|
||||
|
||||
const TEXT_MODEL_DOWNLOAD_URL = {
|
||||
|
@ -198,6 +199,14 @@ async function getOnnxTextSession() {
|
|||
return onnxTextSession;
|
||||
}
|
||||
|
||||
let tokenizer: Tokenizer = null;
|
||||
function getTokenizer() {
|
||||
if (!tokenizer) {
|
||||
tokenizer = new Tokenizer();
|
||||
}
|
||||
return tokenizer;
|
||||
}
|
||||
|
||||
export async function computeImageEmbedding(
|
||||
inputFilePath: string
|
||||
): Promise<Float32Array> {
|
||||
|
@ -274,7 +283,7 @@ export async function computeTextEmbedding(
|
|||
ggmlTextEmbedding,
|
||||
onnxTextEmbedding
|
||||
);
|
||||
console.log('textEmbeddingScore', score);
|
||||
log.info('textEmbeddingScore', score);
|
||||
return onnxTextEmbedding;
|
||||
}
|
||||
|
||||
|
@ -323,13 +332,14 @@ export async function computeONNXTextEmbedding(
|
|||
): Promise<Float32Array> {
|
||||
try {
|
||||
const imageSession = await getOnnxTextSession();
|
||||
const tokenizedText = Int32Array.from(encode(text));
|
||||
const tokenizer = getTokenizer();
|
||||
const tokenizedText = Int32Array.from(tokenizer.encodeForCLIP(text));
|
||||
const feeds = {
|
||||
input: new ort.Tensor('int32', tokenizedText, [1, 77]),
|
||||
};
|
||||
const results = await imageSession.run(feeds);
|
||||
console.log('result', results);
|
||||
return new Float32Array();
|
||||
const embedVec = results['output'].data; // Float32Array
|
||||
return embedVec;
|
||||
} catch (err) {
|
||||
if (err.message === CustomErrors.MODEL_DOWNLOAD_PENDING) {
|
||||
log.info(CustomErrors.MODEL_DOWNLOAD_PENDING);
|
||||
|
|
4
src/utils/clip-bpe-ts/bpe_simple_vocab_16e6.ts
Normal file
4
src/utils/clip-bpe-ts/bpe_simple_vocab_16e6.ts
Normal file
File diff suppressed because one or more lines are too long
|
@ -1,5 +1,5 @@
|
|||
import * as htmlEntities from 'html-entities';
|
||||
import bpeVocabData from './bpe_simple_vocab_16e6.mjs';
|
||||
import bpeVocabData from './bpe_simple_vocab_16e6';
|
||||
// import ftfy from "https://deno.land/x/ftfy_pyodide@v0.1.1/mod.js";
|
||||
|
||||
function ord(c: string) {
|
File diff suppressed because one or more lines are too long
Loading…
Add table
Reference in a new issue