|
|
@@ -20,7 +20,8 @@ import type { PageModel } from '../../models/page';
|
|
|
import { createBatchStream } from '../../util/batch-stream';
|
|
|
import { configManager } from '../config-manager';
|
|
|
import type { UpdateOrInsertPagesOpts } from '../interfaces/search';
|
|
|
-
|
|
|
+// // import { embed, openaiClient, fileUpload } from '../openai';
|
|
|
+// import { getOrCreateSearchAssistant } from '../openai/assistant';
|
|
|
|
|
|
import { aggregatePipelineToIndex } from './aggregate-to-index';
|
|
|
import type { AggregatedPage, BulkWriteBody, BulkWriteCommand } from './bulk-write';
|
|
|
@@ -380,6 +381,7 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
|
|
|
const document: BulkWriteBody = {
|
|
|
path: page.path,
|
|
|
body: page.revision.body,
|
|
|
+ body_embedded: page.revisionBodyEmbedded,
|
|
|
username: page.creator?.username,
|
|
|
comments: page.commentsCount > 0 ? page.comments : undefined,
|
|
|
comment_count: page.commentsCount,
|
|
|
@@ -479,6 +481,28 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
|
|
|
},
|
|
|
});
|
|
|
|
|
|
+ // const appendEmbeddingStream = new Transform({
|
|
|
+ // objectMode: true,
|
|
|
+ // async transform(chunk: AggregatedPage[], encoding, callback) {
|
|
|
+ // // append embedding
|
|
|
+ // for await (const doc of chunk) {
|
|
|
+ // doc.revisionBodyEmbedded = (await embed(doc.revision.body, doc.creator?.username))[0].embedding;
|
|
|
+ // }
|
|
|
+
|
|
|
+ // this.push(chunk);
|
|
|
+ // callback();
|
|
|
+ // },
|
|
|
+ // });
|
|
|
+
|
|
|
+ // const appendFileUploadedStream = new Transform({
|
|
|
+ // objectMode: true,
|
|
|
+ // async transform(chunk, encoding, callback) {
|
|
|
+ // await fileUpload(chunk);
|
|
|
+ // this.push(chunk);
|
|
|
+ // callback();
|
|
|
+ // },
|
|
|
+ // });
|
|
|
+
|
|
|
let count = 0;
|
|
|
const writeStream = new Writable({
|
|
|
objectMode: true,
|
|
|
@@ -532,6 +556,8 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
|
|
|
readStream
|
|
|
.pipe(batchStream)
|
|
|
.pipe(appendTagNamesStream)
|
|
|
+ // .pipe(appendEmbeddingStream)
|
|
|
+ // .pipe(appendFileUploadedStream)
|
|
|
.pipe(writeStream);
|
|
|
|
|
|
return streamToPromise(writeStream);
|
|
|
@@ -832,6 +858,42 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
|
|
|
};
|
|
|
}
|
|
|
|
|
|
+ // async appendVectorScore(query, queryString: string, username?: string): Promise<void> {
|
|
|
+
|
|
|
+ // const searchAssistant = await getOrCreateSearchAssistant();
|
|
|
+
|
|
|
+ // // generate keywords for vector
|
|
|
+ // const run = await openaiClient.beta.threads.createAndRunPoll({
|
|
|
+ // assistant_id: searchAssistant.id,
|
|
|
+ // thread: {
|
|
|
+ // messages: [
|
|
|
+ // { role: 'user', content: 'globalLang: "en_US", userLang: "ja_JP", user_input: "武井さんがジョインしたのはいつですか?"' },
|
|
|
+ // { role: 'assistant', content: '武井さん 武井 takei yuki ジョイン join 入社 加入 雇用開始 年月日 start date join employee' },
|
|
|
+ // { role: 'user', content: `globalLang: "en_US", userLang: "ja_JP", user_input: "${queryString}"` },
|
|
|
+ // ],
|
|
|
+ // },
|
|
|
+ // });
|
|
|
+ // const messages = await openaiClient.beta.threads.messages.list(run.thread_id, {
|
|
|
+ // limit: 1,
|
|
|
+ // });
|
|
|
+ // const content = messages.data[0].content[0];
|
|
|
+ // const keywordsForVector = content.type === 'text' ? content.text.value : queryString;
|
|
|
+
|
|
|
+ // logger.debug('keywordsFor: ', keywordsForVector);
|
|
|
+
|
|
|
+ // const queryVector = (await embed(queryString, username))[0].embedding;
|
|
|
+
|
|
|
+ // query.body.query = {
|
|
|
+ // script_score: {
|
|
|
+ // query: { ...query.body.query },
|
|
|
+ // script: {
|
|
|
+ // source: "cosineSimilarity(params.query_vector, 'body_embedded') + 1.0",
|
|
|
+ // params: { query_vector: queryVector },
|
|
|
+ // },
|
|
|
+ // },
|
|
|
+ // };
|
|
|
+ // }
|
|
|
+
|
|
|
appendHighlight(query) {
|
|
|
query.body.highlight = {
|
|
|
fragmenter: 'simple',
|
|
|
@@ -862,15 +924,22 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
|
|
|
const size = option?.limit ?? null;
|
|
|
const sort = option?.sort ?? null;
|
|
|
const order = option?.order ?? null;
|
|
|
+
|
|
|
const query = this.createSearchQuery();
|
|
|
- this.appendCriteriaForQueryString(query, terms);
|
|
|
|
|
|
- await this.filterPagesByViewer(query, user, userGroups);
|
|
|
+ if (option?.vector) {
|
|
|
+ // await this.filterPagesByViewer(query, user, userGroups);
|
|
|
+ // await this.appendVectorScore(query, queryString, user?.username);
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ this.appendCriteriaForQueryString(query, terms);
|
|
|
+ await this.filterPagesByViewer(query, user, userGroups);
|
|
|
+ await this.appendFunctionScore(query, queryString);
|
|
|
+ }
|
|
|
|
|
|
this.appendResultSize(query, from, size);
|
|
|
|
|
|
this.appendSortOrder(query, sort, order);
|
|
|
- await this.appendFunctionScore(query, queryString);
|
|
|
|
|
|
this.appendHighlight(query);
|
|
|
|