Răsfoiți Sursa

add article headers to html for vector store file

Yuki Takei 11 luni în urmă
părinte
comite
e83510f9dd

+ 7 - 5
apps/app/src/features/openai/server/services/openai.ts

@@ -295,9 +295,11 @@ class OpenaiService implements IOpenaiService {
     }
   }
 
-  private async uploadFile(pageId: Types.ObjectId, pagePath: string, revisionBody: string): Promise<OpenAI.Files.FileObject> {
-    const convertedHtml = await convertMarkdownToHtml({ pagePath, revisionBody });
-    const file = await toFile(Readable.from(convertedHtml), `${pageId}.html`);
+  private async uploadFile(revisionBody: string, page: HydratedDocument<PageDocument>): Promise<OpenAI.Files.FileObject> {
+    const siteUrl = configManager.getConfig('app:siteUrl');
+
+    const convertedHtml = await convertMarkdownToHtml(revisionBody, { page, siteUrl });
+    const file = await toFile(Readable.from(convertedHtml), `${page._id}.html`);
     const uploadedFile = await this.client.uploadFile(file);
     return uploadedFile;
   }
@@ -325,14 +327,14 @@ class OpenaiService implements IOpenaiService {
     const processUploadFile = async(page: HydratedDocument<PageDocument>) => {
       if (page._id != null && page.revision != null) {
         if (isPopulated(page.revision) && page.revision.body.length > 0) {
-          const uploadedFile = await this.uploadFile(page._id, page.path, page.revision.body);
+          const uploadedFile = await this.uploadFile(page.revision.body, page);
           prepareVectorStoreFileRelations(vectorStoreRelation._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
           return;
         }
 
         const pagePopulatedToShowRevision = await page.populateDataToShowRevision();
         if (pagePopulatedToShowRevision.revision != null && pagePopulatedToShowRevision.revision.body.length > 0) {
-          const uploadedFile = await this.uploadFile(page._id, page.path, pagePopulatedToShowRevision.revision.body);
+          const uploadedFile = await this.uploadFile(pagePopulatedToShowRevision.revision.body, page);
           prepareVectorStoreFileRelations(vectorStoreRelation._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
         }
       }

+ 18 - 2
apps/app/src/features/openai/server/utils/convert-markdown-to-html.ts

@@ -1,4 +1,6 @@
 import { dynamicImport } from '@cspell/dynamic-import';
+import type { IPage } from '@growi/core/dist/interfaces';
+import { DevidedPagePath } from '@growi/core/dist/models';
 import type { Root, Code } from 'mdast';
 import type * as RehypeMeta from 'rehype-meta';
 import type * as RehypeStringify from 'rehype-stringify';
@@ -55,7 +57,12 @@ const initializeModules = async(): Promise<void> => {
   };
 };
 
-export const convertMarkdownToHtml = async({ pagePath, revisionBody }: { pagePath: string, revisionBody: string }): Promise<string> => {
+type ConvertMarkdownToHtmlArgs = {
+  page: IPage,
+  siteUrl: string | undefined,
+}
+
+export const convertMarkdownToHtml = async(revisionBody: string, args: ConvertMarkdownToHtmlArgs): Promise<string> => {
   await initializeModules();
 
   const {
@@ -76,12 +83,21 @@ export const convertMarkdownToHtml = async({ pagePath, revisionBody }: { pagePat
     };
   };
 
+  const { page, siteUrl } = args;
+  const { latter: title } = new DevidedPagePath(page.path);
+
   const processor = unified()
     .use(remarkParse)
     .use(sanitizeMarkdown)
     .use(remarkRehype)
     .use(rehypeMeta, {
-      title: pagePath,
+      og: true,
+      type: 'article',
+      title,
+      pathname: page.path,
+      published: page.createdAt,
+      modified: page.updatedAt,
+      origin: siteUrl,
     })
     .use(rehypeStringify);