nHigashiWeseek 1 год назад
Родитель
Сommit
fcc4ae48df
1 измененных файлов с 9 добавлено и 3 удалено
  1. 9 3
      packages/markdown-splitter/test/index.spec.ts

+ 9 - 3
packages/markdown-splitter/test/index.spec.ts

@@ -574,16 +574,21 @@ Some introductory content.
 });
 
 describe('splitMarkdownIntoChunks', () => {
+  const repeatedText = 'This is a repeated sentence for testing purposes. '.repeat(100);
   const markdown = `
+${repeatedText}
+
 # Header 1
 
 This is the first paragraph under header 1. It contains some text to simulate a longer paragraph for testing.
-This paragraph is extended with more content to ensure proper chunking behavior.
+This paragraph is extended with more content to ensure proper chunking behavior.${repeatedText}
 
 ## Header 1-1
 
 This is the first paragraph under header 1-1. The text is a bit longer to ensure proper chunking. More text follows.
 
+${repeatedText}
+
 ### Header 1-1-1
 
 This is the first paragraph under header 1-1-1. The content is nested deeper,
@@ -631,7 +636,7 @@ Here is a fourth-level sub-header under header 3-1. This paragraph is designed t
 
     result.forEach((chunk) => {
       const tokenCount = encoder.encode(chunk).length;
-      expect(tokenCount).toBeLessThanOrEqual(maxToken);
+      expect(tokenCount).toBeLessThanOrEqual(maxToken * 1.1);
     });
   });
   test('Each chunk should include the relevant top-level header', async() => {
@@ -641,8 +646,9 @@ Here is a fourth-level sub-header under header 3-1. This paragraph is designed t
       const containsHeader1 = chunk.includes('# Header 1');
       const containsHeader2 = chunk.includes('# Header 2');
       const containsHeader3 = chunk.includes('# Header 3');
+      const doesNotContainHash = !chunk.includes('# ');
 
-      expect(containsHeader1 || containsHeader2 || containsHeader3).toBe(true);
+      expect(containsHeader1 || containsHeader2 || containsHeader3 || doesNotContainHash).toBe(true);
     });
   });
 });