nHigashiWeseek 1 год назад
Родитель
Сommit
2e8ee0c986

+ 4 - 1
apps/app/src/server/service/page-spritter.ts

@@ -27,7 +27,10 @@ export async function splitMarkdownByTokens(
 ): Promise<string[]> {
 ): Promise<string[]> {
   // Obtain encoding based on the model
   // Obtain encoding based on the model
   const encoding: Tiktoken = encoding_for_model(model);
   const encoding: Tiktoken = encoding_for_model(model);
-  const remarkParse = (await import('remark-parse')).default;
+
+  // Dynamically import remark-parse
+  const { default: remarkParse } = await import('remark-parse');
+
   // Parse Markdown into AST
   // Parse Markdown into AST
   const processor = unified().use(remarkParse);
   const processor = unified().use(remarkParse);
   const tree = processor.parse(markdownContent) as Root;
   const tree = processor.parse(markdownContent) as Root;

+ 12 - 10
apps/app/test/integration/service/page-sprit.test.ts

@@ -1,3 +1,5 @@
+// test/integration/service/page-spritter.test.ts
+
 import type { TiktokenModel } from '@dqbd/tiktoken';
 import type { TiktokenModel } from '@dqbd/tiktoken';
 
 
 import { splitMarkdownByTokens } from '../../../src/server/service/page-spritter';
 import { splitMarkdownByTokens } from '../../../src/server/service/page-spritter';
@@ -5,14 +7,14 @@ import { splitMarkdownByTokens } from '../../../src/server/service/page-spritter
 describe('splitMarkdownByTokens', () => {
 describe('splitMarkdownByTokens', () => {
   const model: TiktokenModel = 'gpt-3.5-turbo';
   const model: TiktokenModel = 'gpt-3.5-turbo';
 
 
-  test('Returns without splitting when token count is below the maximum', () => {
+  test('Returns without splitting when token count is below the maximum', async() => {
     const markdownContent = '# Heading\n\nThis is a test.';
     const markdownContent = '# Heading\n\nThis is a test.';
-    const result = splitMarkdownByTokens(model, markdownContent, 1000);
+    const result = await splitMarkdownByTokens(model, markdownContent, 1000);
     expect(result).toHaveLength(1);
     expect(result).toHaveLength(1);
     expect(result[0]).toBe(markdownContent);
     expect(result[0]).toBe(markdownContent);
   });
   });
 
 
-  test('Splits by sections when token count exceeds the maximum', () => {
+  test('Splits by sections when token count exceeds the maximum', async() => {
     const markdownContent = `
     const markdownContent = `
 # Heading1
 # Heading1
 
 
@@ -26,14 +28,14 @@ This is the content of section 2.
 
 
 This is the content of section 3.
 This is the content of section 3.
     `;
     `;
-    const result = splitMarkdownByTokens(model, markdownContent, 10); // Set a small maxTokens
+    const result = await splitMarkdownByTokens(model, markdownContent, 10); // Set a small maxTokens
     expect(result).toHaveLength(3);
     expect(result).toHaveLength(3);
     expect(result[0]).toContain('Heading1');
     expect(result[0]).toContain('Heading1');
     expect(result[1]).toContain('Heading2');
     expect(result[1]).toContain('Heading2');
     expect(result[2]).toContain('Heading3');
     expect(result[2]).toContain('Heading3');
   });
   });
 
 
-  test('Recursively splits into subsections', () => {
+  test('Recursively splits into subsections', async() => {
     const markdownContent = `
     const markdownContent = `
 # Heading1
 # Heading1
 
 
@@ -49,29 +51,29 @@ Content1-2
 
 
 Content2
 Content2
     `;
     `;
-    const result = splitMarkdownByTokens(model, markdownContent, 20);
+    const result = await splitMarkdownByTokens(model, markdownContent, 20);
     expect(result.length).toBeGreaterThan(2);
     expect(result.length).toBeGreaterThan(2);
     expect(result.some(chunk => chunk.includes('Subheading1-1'))).toBe(true);
     expect(result.some(chunk => chunk.includes('Subheading1-1'))).toBe(true);
     expect(result.some(chunk => chunk.includes('Subheading1-2'))).toBe(true);
     expect(result.some(chunk => chunk.includes('Subheading1-2'))).toBe(true);
   });
   });
 
 
-  test('Splits by paragraphs', () => {
+  test('Splits by paragraphs', async() => {
     const markdownContent = `
     const markdownContent = `
 # Heading
 # Heading
 
 
 ${'Long paragraph. '.repeat(50)}
 ${'Long paragraph. '.repeat(50)}
     `;
     `;
-    const result = splitMarkdownByTokens(model, markdownContent, 50);
+    const result = await splitMarkdownByTokens(model, markdownContent, 50);
     expect(result.length).toBeGreaterThan(1);
     expect(result.length).toBeGreaterThan(1);
   });
   });
 
 
-  test('Adds a single node as is when it exceeds maxTokens', () => {
+  test('Adds a single node as is when it exceeds maxTokens', async() => {
     const markdownContent = `
     const markdownContent = `
 # Heading
 # Heading
 
 
 ${'Very long paragraph. '.repeat(200)}
 ${'Very long paragraph. '.repeat(200)}
     `;
     `;
-    const result = splitMarkdownByTokens(model, markdownContent, 50);
+    const result = await splitMarkdownByTokens(model, markdownContent, 50);
     expect(result).toHaveLength(1);
     expect(result).toHaveLength(1);
     expect(result[0]).toContain('Very long paragraph.');
     expect(result[0]).toContain('Very long paragraph.');
   });
   });