1 год назад · 288df64440
--- a/packages/markdown-splitter/package.json
+++ b/packages/markdown-splitter/package.json
@@ -40,6 +40,7 @@
 
				   },
			
 
				   "dependencies": {
			
 
				     "@types/js-yaml": "^4.0.9",
			
 
				+    "js-tiktoken": "^1.0.15",
			
 
				     "remark-frontmatter": "^5.0.0",
			
 
				     "remark-gfm": "^4.0.0",
			
 
				     "remark-parse": "^11.0.0",
			
--- a/packages/markdown-splitter/src/index.ts
+++ b/packages/markdown-splitter/src/index.ts
@@ -1 +1,2 @@
 
				 export * from './services/markdown-splitter';
			
 
				+export * from './services/markdown-token-splitter';
			
--- a/packages/markdown-splitter/src/services/markdown-splitter.ts
+++ b/packages/markdown-splitter/src/services/markdown-splitter.ts
@@ -1,3 +1,5 @@
 
				+import type { TiktokenModel } from 'js-tiktoken';
			
 
				+import { encodingForModel } from 'js-tiktoken';
			
 
				 import yaml from 'js-yaml';
			
 
				 import remarkFrontmatter from 'remark-frontmatter'; // Frontmatter processing
			
 
				 import remarkGfm from 'remark-gfm'; // GFM processing
			
@@ -6,26 +8,13 @@ import type { Options as StringifyOptions } from 'remark-stringify';
 
				 import remarkStringify from 'remark-stringify';
			
 
				 import { unified } from 'unified';
			
 
				 
			
 
				-export type Chunk = {
			
 
				+export type MarkdownFragment = {
			
 
				   label: string;
			
 
				+  type: string;
			
 
				   text: string;
			
 
				+  tokenCount: number;
			
 
				 };
			
 
				 
			
 
				-/**
			
 
				- * Processes and adds a new chunk to the chunks array if content is not empty.
			
 
				- * Clears the contentBuffer array after processing.
			
 
				- * @param chunks - The array to store processed chunks.
			
 
				- * @param contentBuffer - The array of content lines to be processed.
			
 
				- * @param label - The label for the content chunk.
			
 
				- */
			
 
				-function addContentChunk(chunks: Chunk[], contentBuffer: string[], label: string) {
			
 
				-  const text = contentBuffer.join('\n\n').trimEnd();
			
 
				-  if (text !== '') {
			
 
				-    chunks.push({ label, text });
			
 
				-  }
			
 
				-  contentBuffer.length = 0; // Clear the contentBuffer array
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  * Updates the section numbers based on the heading depth and returns the updated section label.
			
 
				  * Handles non-consecutive heading levels by initializing missing levels with 1.
			
@@ -53,22 +42,23 @@ function updateSectionNumbers(sectionNumbers: number[], headingDepth: number): s
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * Splits Markdown text into labeled chunks using remark-parse and remark-stringify,
			
 
				- * considering content that may start before any headers and handling non-consecutive heading levels.
			
 
				+ * Splits Markdown text into labeled markdownFragments using remark-parse and remark-stringify,
			
 
				+ * processing each content node separately and labeling them as 1-content-1, 1-content-2, etc.
			
 
				  * @param markdownText - The input Markdown string.
			
 
				- * @returns An array of labeled chunks.
			
 
				+ * @returns An array of labeled markdownFragments.
			
 
				  */
			
 
				-export async function splitMarkdownIntoChunks(markdownText: string): Promise<Chunk[]> {
			
 
				-  const chunks: Chunk[] = [];
			
 
				+export async function splitMarkdownIntoFragments(markdownText: string, model: TiktokenModel): Promise<MarkdownFragment[]> {
			
 
				+  const markdownFragments: MarkdownFragment[] = [];
			
 
				   const sectionNumbers: number[] = [];
			
 
				-  let frontmatter: Record<string, unknown> | null = null; // Variable to store frontmatter
			
 
				-  const contentBuffer: string[] = [];
			
 
				   let currentSectionLabel = '';
			
 
				+  const contentCounters: Record<string, number> = {};
			
 
				 
			
 
				   if (typeof markdownText !== 'string' || markdownText.trim() === '') {
			
 
				-    return chunks;
			
 
				+    return markdownFragments;
			
 
				   }
			
 
				 
			
 
				+  const encoder = encodingForModel(model);
			
 
				+
			
 
				   const parser = unified()
			
 
				     .use(remarkParse)
			
 
				     .use(remarkFrontmatter, ['yaml'])
			
@@ -89,42 +79,48 @@ export async function splitMarkdownIntoChunks(markdownText: string): Promise<Chu
 
				   // Iterate over top-level nodes to prevent duplication
			
 
				   for (const node of parsedTree.children) {
			
 
				     if (node.type === 'yaml') {
			
 
				-      frontmatter = yaml.load(node.value) as Record<string, unknown>;
			
 
				+      // Frontmatter block found, handle only the first instance
			
 
				+      const frontmatter = yaml.load(node.value) as Record<string, unknown>;
			
 
				+      const frontmatterText = JSON.stringify(frontmatter, null, 2);
			
 
				+      const tokenCount = encoder.encode(frontmatterText).length;
			
 
				+      markdownFragments.push({
			
 
				+        label: 'frontmatter',
			
 
				+        type: 'yaml',
			
 
				+        text: frontmatterText,
			
 
				+        tokenCount,
			
 
				+      });
			
 
				     }
			
 
				     else if (node.type === 'heading') {
			
 
				-      // Process pending content before heading
			
 
				-      if (contentBuffer.length > 0) {
			
 
				-        const contentLabel = currentSectionLabel !== '' ? `${currentSectionLabel}-content` : '0-content';
			
 
				-        addContentChunk(chunks, contentBuffer, contentLabel);
			
 
				-      }
			
 
				-
			
 
				       const headingDepth = node.depth;
			
 
				       currentSectionLabel = updateSectionNumbers(sectionNumbers, headingDepth);
			
 
				 
			
 
				-      const headingMarkdown = stringifier.stringify(node as any);// eslint-disable-line @typescript-eslint/no-explicit-any
			
 
				-      chunks.push({ label: `${currentSectionLabel}-heading`, text: headingMarkdown.trim() });
			
 
				+      const headingMarkdown = stringifier.stringify(node as any).trim(); // eslint-disable-line @typescript-eslint/no-explicit-any
			
 
				+      const tokenCount = encoder.encode(headingMarkdown).length;
			
 
				+      markdownFragments.push({
			
 
				+        label: `${currentSectionLabel}-heading`, type: node.type, text: headingMarkdown, tokenCount,
			
 
				+      });
			
 
				     }
			
 
				     else {
			
 
				-      // Add non-heading content to the buffer
			
 
				+      // Process non-heading content individually
			
 
				       const contentMarkdown = stringifier.stringify(node as any).trim(); // eslint-disable-line @typescript-eslint/no-explicit-any
			
 
				       if (contentMarkdown !== '') {
			
 
				-        contentBuffer.push(contentMarkdown);
			
 
				+        const contentCountKey = currentSectionLabel || '0';
			
 
				+        if (!contentCounters[contentCountKey]) {
			
 
				+          contentCounters[contentCountKey] = 1;
			
 
				+        }
			
 
				+        else {
			
 
				+          contentCounters[contentCountKey]++;
			
 
				+        }
			
 
				+        const contentLabel = currentSectionLabel !== ''
			
 
				+          ? `${currentSectionLabel}-content-${contentCounters[contentCountKey]}`
			
 
				+          : `0-content-${contentCounters[contentCountKey]}`;
			
 
				+        const tokenCount = encoder.encode(contentMarkdown).length;
			
 
				+        markdownFragments.push({
			
 
				+          label: contentLabel, type: node.type, text: contentMarkdown, tokenCount,
			
 
				+        });
			
 
				       }
			
 
				     }
			
 
				   }
			
 
				 
			
 
				-  // Process any remaining content
			
 
				-  if (contentBuffer.length > 0) {
			
 
				-    const contentLabel = currentSectionLabel !== '' ? `${currentSectionLabel}-content` : '0-content';
			
 
				-    addContentChunk(chunks, contentBuffer, contentLabel);
			
 
				-  }
			
 
				-
			
 
				-  if (frontmatter) {
			
 
				-    chunks.unshift({
			
 
				-      label: 'frontmatter',
			
 
				-      text: JSON.stringify(frontmatter, null, 2),
			
 
				-    });
			
 
				-  }
			
 
				-
			
 
				-  return chunks;
			
 
				+  return markdownFragments;
			
 
				 }
			
--- a/packages/markdown-splitter/src/services/markdown-token-splitter.ts
+++ b/packages/markdown-splitter/src/services/markdown-token-splitter.ts
@@ -0,0 +1,188 @@
 
				+import { encodingForModel, type TiktokenModel } from 'js-tiktoken';
			
 
				+
			
 
				+import { splitMarkdownIntoFragments, type MarkdownFragment } from './markdown-splitter';
			
 
				+
			
 
				+type MarkdownFragmentGroups = MarkdownFragment[][] ;
			
 
				+
			
 
				+function groupMarkdownFragments(
			
 
				+    markdownFragments: MarkdownFragment[],
			
 
				+    maxToken: number,
			
 
				+): MarkdownFragmentGroups {
			
 
				+
			
 
				+  const prefixes = markdownFragments.map(({ label }) => {
			
 
				+    if (label === 'frontmatter') return 'frontmatter';
			
 
				+    const match = label.match(/^\d+(?:-\d+)*/)!; // eslint-disable-line @typescript-eslint/no-non-null-assertion
			
 
				+    return match[0];
			
 
				+  });
			
 
				+
			
 
				+  const uniquePrefixes = [...new Set(prefixes.filter(Boolean))];
			
 
				+
			
 
				+  // Group chunks by prefix
			
 
				+  const fragmentGroupes: MarkdownFragmentGroups = [];
			
 
				+  let remainingPrefixes = [...uniquePrefixes];
			
 
				+
			
 
				+  // Process chunks so that the total token count per level doesn't exceed maxToken
			
 
				+  while (remainingPrefixes.length > 0) {
			
 
				+    const prefix = remainingPrefixes[0]; // Get the first prefix
			
 
				+    const hasNextLevelPrefix = uniquePrefixes.some(p => p !== prefix && p.startsWith(prefix));
			
 
				+
			
 
				+    if (!hasNextLevelPrefix) {
			
 
				+      // If there is no prefix that starts with the current prefix, group the chunks directly
			
 
				+      let matchingFragments = markdownFragments.filter(fragment => fragment.label.startsWith(prefix));
			
 
				+
			
 
				+      // Add parent heading if it exists
			
 
				+      const parts = prefix.split('-');
			
 
				+      for (let i = 1; i < parts.length; i++) {
			
 
				+        const parentPrefix = parts.slice(0, i).join('-');
			
 
				+        const parentHeading = markdownFragments.find(fragment => fragment.label === `${parentPrefix}-heading`);
			
 
				+        if (parentHeading) {
			
 
				+          matchingFragments = [parentHeading, ...matchingFragments]; // Add the heading at the front
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      fragmentGroupes.push(matchingFragments);
			
 
				+    }
			
 
				+    else {
			
 
				+      // Filter chunks that start with the current prefix
			
 
				+      let matchingFragments = markdownFragments.filter(fragment => fragment.label.startsWith(prefix));
			
 
				+
			
 
				+      // Add parent heading if it exists
			
 
				+      const parts = prefix.split('-');
			
 
				+      for (let i = 1; i < parts.length; i++) {
			
 
				+        const parentPrefix = parts.slice(0, i).join('-');
			
 
				+        const parentHeading = markdownFragments.find(fragment => fragment.label === `${parentPrefix}-heading`);
			
 
				+        if (parentHeading) {
			
 
				+          matchingFragments = [parentHeading, ...matchingFragments];
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      // Calculate total token count including parent headings
			
 
				+      const totalTokenCount = matchingFragments.reduce((sum, fragment) => sum + fragment.tokenCount, 0);
			
 
				+
			
 
				+      // If the total token count doesn't exceed maxToken, group the chunks
			
 
				+      if (totalTokenCount <= maxToken) {
			
 
				+        fragmentGroupes.push(matchingFragments);
			
 
				+        remainingPrefixes = remainingPrefixes.filter(p => !p.startsWith(`${prefix}-`));
			
 
				+      }
			
 
				+      else {
			
 
				+        // If it exceeds maxToken, strictly filter chunks by the exact numeric prefix
			
 
				+        const strictMatchingFragments = markdownFragments.filter((fragment) => {
			
 
				+          const match = fragment.label.match(/^\d+(-\d+)*(?=-)/);
			
 
				+          return match && match[0] === prefix;
			
 
				+        });
			
 
				+
			
 
				+        // Add parent heading if it exists
			
 
				+        for (let i = 1; i < parts.length; i++) {
			
 
				+          const parentPrefix = parts.slice(0, i).join('-');
			
 
				+          const parentHeading = markdownFragments.find(fragment => fragment.label === `${parentPrefix}-heading`);
			
 
				+          if (parentHeading) {
			
 
				+            strictMatchingFragments.unshift(parentHeading); // Add the heading at the front
			
 
				+          }
			
 
				+        }
			
 
				+
			
 
				+        fragmentGroupes.push(strictMatchingFragments);
			
 
				+      }
			
 
				+    }
			
 
				+    remainingPrefixes.shift();
			
 
				+  }
			
 
				+
			
 
				+  return fragmentGroupes;
			
 
				+}
			
 
				+
			
 
				+// Function to group markdown into chunks based on token count
			
 
				+export async function splitMarkdownIntoChunks(
			
 
				+    markdownText: string,
			
 
				+    model: TiktokenModel,
			
 
				+    maxToken = 800,
			
 
				+): Promise<string[]> {
			
 
				+  const encoder = encodingForModel(model);
			
 
				+
			
 
				+  // If the total token count for the entire markdown text is less than or equal to maxToken,
			
 
				+  // return the entire markdown as a single chunk.
			
 
				+  if (encoder.encode(markdownText).length <= maxToken) {
			
 
				+    return [markdownText];
			
 
				+  }
			
 
				+
			
 
				+  // Split markdown text into chunks
			
 
				+  const markdownFragments = await splitMarkdownIntoFragments(markdownText, model);
			
 
				+  const chunks = [] as string[];
			
 
				+
			
 
				+  // Group the chunks based on token count
			
 
				+  const fragmentGroupes = groupMarkdownFragments(markdownFragments, maxToken);
			
 
				+
			
 
				+  fragmentGroupes.forEach((fragmentGroupe) => {
			
 
				+    // Calculate the total token count for each group
			
 
				+    const totalTokenCount = fragmentGroupe.reduce((sum, fragment) => sum + fragment.tokenCount, 0);
			
 
				+
			
 
				+    // If the total token count doesn't exceed maxToken, combine the chunks into one
			
 
				+    if (totalTokenCount <= maxToken) {
			
 
				+      const chunk = fragmentGroupe.map((fragment, index) => {
			
 
				+        const nextFragment = fragmentGroupe[index + 1];
			
 
				+        if (nextFragment) {
			
 
				+          // If both the current and next chunks are headings, add a single newline
			
 
				+          if (fragment.type === 'heading' && nextFragment.type === 'heading') {
			
 
				+            return `${fragment.text}\n`;
			
 
				+          }
			
 
				+          // Add two newlines for other cases
			
 
				+          return `${fragment.text}\n\n`;
			
 
				+        }
			
 
				+        return fragment.text; // No newlines for the last chunk
			
 
				+      }).join('');
			
 
				+
			
 
				+      chunks.push(chunk);
			
 
				+    }
			
 
				+    else {
			
 
				+      // If the total token count exceeds maxToken, split content
			
 
				+      const headingFragments = fragmentGroupe.filter(fragment => fragment.type === 'heading'); // Find all headings
			
 
				+      const headingText = headingFragments.map(heading => heading.text).join('\n'); // Combine headings with one newline
			
 
				+
			
 
				+      for (const fragment of fragmentGroupe) {
			
 
				+        if (fragment.label.includes('content')) {
			
 
				+          // Combine heading and paragraph content
			
 
				+          const combinedTokenCount = headingFragments.reduce((sum, heading) => sum + heading.tokenCount, 0) + fragment.tokenCount;
			
 
				+          // Check if headingChunks alone exceed maxToken
			
 
				+          const headingTokenCount = headingFragments.reduce((sum, heading) => sum + heading.tokenCount, 0);
			
 
				+
			
 
				+          if (headingTokenCount > maxToken / 2) {
			
 
				+            throw new Error(
			
 
				+              `Heading token count is too large. Heading token count: ${headingTokenCount}, allowed maximum: ${Math.ceil(maxToken / 2)}`,
			
 
				+            );
			
 
				+          }
			
 
				+
			
 
				+          // If the combined token count exceeds maxToken, split the content by character count
			
 
				+          if (combinedTokenCount > maxToken) {
			
 
				+            const headingTokenCount = headingFragments.reduce((sum, heading) => sum + heading.tokenCount, 0);
			
 
				+            const remainingTokenCount = maxToken - headingTokenCount;
			
 
				+
			
 
				+            // Calculate the total character count and token count
			
 
				+            const fragmentCharCount = fragment.text.length;
			
 
				+            const fragmenTokenCount = fragment.tokenCount;
			
 
				+
			
 
				+            // Calculate the character count for splitting
			
 
				+            const charCountForSplit = Math.floor((remainingTokenCount / fragmenTokenCount) * fragmentCharCount);
			
 
				+
			
 
				+            // Split content based on character count
			
 
				+            const splitContents = [];
			
 
				+            for (let i = 0; i < fragment.text.length; i += charCountForSplit) {
			
 
				+              splitContents.push(fragment.text.slice(i, i + charCountForSplit));
			
 
				+            }
			
 
				+
			
 
				+            // Add each split content to the new group of chunks
			
 
				+            splitContents.forEach((splitText) => {
			
 
				+              const chunk = headingText
			
 
				+                ? `${headingText}\n\n${splitText}`
			
 
				+                : `${splitText}`;
			
 
				+              chunks.push(chunk);
			
 
				+            });
			
 
				+          }
			
 
				+          else {
			
 
				+            const chunk = `${headingText}\n\n${fragment.text}`;
			
 
				+            chunks.push(chunk);
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  });
			
 
				+
			
 
				+  return chunks;
			
 
				+}
			
--- a/packages/markdown-splitter/test/index.spec.ts
+++ b/packages/markdown-splitter/test/index.spec.ts
@@ -1,12 +1,17 @@
 
				-import type { Chunk } from '../src/services/markdown-splitter';
			
 
				-import { splitMarkdownIntoChunks } from '../src/services/markdown-splitter';
			
 
				+import { encodingForModel, type TiktokenModel } from 'js-tiktoken';
			
 
				 
			
 
				-describe('splitMarkdownIntoChunks', () => {
			
 
				+import type { MarkdownFragment } from '~/index';
			
 
				+import { splitMarkdownIntoChunks, splitMarkdownIntoFragments } from '~/index';
			
 
				+
			
 
				+const MODEL: TiktokenModel = 'gpt-4';
			
 
				+const encoder = encodingForModel(MODEL);
			
 
				+
			
 
				+describe('splitMarkdownIntoFragments', () => {
			
 
				 
			
 
				   test('handles empty markdown string', async() => {
			
 
				     const markdown = '';
			
 
				-    const expected: Chunk[] = [];
			
 
				-    const result = await splitMarkdownIntoChunks(markdown); // Await the result
			
 
				+    const expected: MarkdownFragment[] = [];
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -16,13 +21,23 @@ It spans multiple lines.
 
				 
			
 
				 Another paragraph.
			
 
				     `;
			
 
				-    const expected: Chunk[] = [
			
 
				+
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				+      {
			
 
				+        label: '0-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'This is some content without any headers.\nIt spans multiple lines.',
			
 
				+        tokenCount: encoder.encode('This is some content without any headers.\nIt spans multiple lines.').length,
			
 
				+      },
			
 
				       {
			
 
				-        label: '0-content',
			
 
				-        text: 'This is some content without any headers.\nIt spans multiple lines.\n\nAnother paragraph.',
			
 
				+        label: '0-content-2',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Another paragraph.',
			
 
				+        tokenCount: encoder.encode('Another paragraph.').length,
			
 
				       },
			
 
				     ];
			
 
				-    const result = await splitMarkdownIntoChunks(markdown); // Await the result
			
 
				+
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -37,15 +52,47 @@ Content under header 1.1.
 
				 # Header 2
			
 
				 Content under header 2.
			
 
				     `;
			
 
				-    const expected: Chunk[] = [
			
 
				-      { label: '1-heading', text: '# Header 1' },
			
 
				-      { label: '1-content', text: 'Content under header 1.' },
			
 
				-      { label: '1-1-heading', text: '## Header 1.1' },
			
 
				-      { label: '1-1-content', text: 'Content under header 1.1.' },
			
 
				-      { label: '2-heading', text: '# Header 2' },
			
 
				-      { label: '2-content', text: 'Content under header 2.' },
			
 
				+
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				+      {
			
 
				+        label: '1-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '# Header 1',
			
 
				+        tokenCount: encoder.encode('# Header 1').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Content under header 1.',
			
 
				+        tokenCount: encoder.encode('Content under header 1.').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-1-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '## Header 1.1',
			
 
				+        tokenCount: encoder.encode('## Header 1.1').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-1-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Content under header 1.1.',
			
 
				+        tokenCount: encoder.encode('Content under header 1.1.').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '2-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '# Header 2',
			
 
				+        tokenCount: encoder.encode('# Header 2').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '2-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Content under header 2.',
			
 
				+        tokenCount: encoder.encode('Content under header 2.').length,
			
 
				+      },
			
 
				     ];
			
 
				-    const result = await splitMarkdownIntoChunks(markdown); // Await the result
			
 
				+
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -68,53 +115,77 @@ Content of chapter 2.
 
				 ## Section 2.1
			
 
				 Content of section 2.1.
			
 
				     `;
			
 
				-    const expected: Chunk[] = [
			
 
				+
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				       {
			
 
				-        label: '0-content',
			
 
				+        label: '0-content-1',
			
 
				+        type: 'paragraph',
			
 
				         text: 'Introduction without a header.',
			
 
				+        tokenCount: encoder.encode('Introduction without a header.').length,
			
 
				       },
			
 
				       {
			
 
				         label: '1-heading',
			
 
				+        type: 'heading',
			
 
				         text: '# Chapter 1',
			
 
				+        tokenCount: encoder.encode('# Chapter 1').length,
			
 
				       },
			
 
				       {
			
 
				-        label: '1-content',
			
 
				+        label: '1-content-1',
			
 
				+        type: 'paragraph',
			
 
				         text: 'Content of chapter 1.',
			
 
				+        tokenCount: encoder.encode('Content of chapter 1.').length,
			
 
				       },
			
 
				       {
			
 
				         label: '1-1-1-heading',
			
 
				+        type: 'heading',
			
 
				         text: '### Section 1.1.1',
			
 
				+        tokenCount: encoder.encode('### Section 1.1.1').length,
			
 
				       },
			
 
				       {
			
 
				-        label: '1-1-1-content',
			
 
				+        label: '1-1-1-content-1',
			
 
				+        type: 'paragraph',
			
 
				         text: 'Content of section 1.1.1.',
			
 
				+        tokenCount: encoder.encode('Content of section 1.1.1.').length,
			
 
				       },
			
 
				       {
			
 
				         label: '1-2-heading',
			
 
				+        type: 'heading',
			
 
				         text: '## Section 1.2',
			
 
				+        tokenCount: encoder.encode('## Section 1.2').length,
			
 
				       },
			
 
				       {
			
 
				-        label: '1-2-content',
			
 
				+        label: '1-2-content-1',
			
 
				+        type: 'paragraph',
			
 
				         text: 'Content of section 1.2.',
			
 
				+        tokenCount: encoder.encode('Content of section 1.2.').length,
			
 
				       },
			
 
				       {
			
 
				         label: '2-heading',
			
 
				+        type: 'heading',
			
 
				         text: '# Chapter 2',
			
 
				+        tokenCount: encoder.encode('# Chapter 2').length,
			
 
				       },
			
 
				       {
			
 
				-        label: '2-content',
			
 
				+        label: '2-content-1',
			
 
				+        type: 'paragraph',
			
 
				         text: 'Content of chapter 2.',
			
 
				+        tokenCount: encoder.encode('Content of chapter 2.').length,
			
 
				       },
			
 
				       {
			
 
				         label: '2-1-heading',
			
 
				+        type: 'heading',
			
 
				         text: '## Section 2.1',
			
 
				+        tokenCount: encoder.encode('## Section 2.1').length,
			
 
				       },
			
 
				       {
			
 
				-        label: '2-1-content',
			
 
				+        label: '2-1-content-1',
			
 
				+        type: 'paragraph',
			
 
				         text: 'Content of section 2.1.',
			
 
				+        tokenCount: encoder.encode('Content of section 2.1.').length,
			
 
				       },
			
 
				     ];
			
 
				-    const result = await splitMarkdownIntoChunks(markdown); // Await the result
			
 
				+
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -132,17 +203,59 @@ Content under header 1.2.
 
				 # Header 2
			
 
				 Content under header 2.
			
 
				     `;
			
 
				-    const expected: Chunk[] = [
			
 
				-      { label: '1-heading', text: '# Header 1' },
			
 
				-      { label: '1-content', text: 'Content under header 1.' },
			
 
				-      { label: '1-1-1-1-heading', text: '#### Header 1.1.1.1' },
			
 
				-      { label: '1-1-1-1-content', text: 'Content under header 1.1.1.1.' },
			
 
				-      { label: '1-2-heading', text: '## Header 1.2' },
			
 
				-      { label: '1-2-content', text: 'Content under header 1.2.' },
			
 
				-      { label: '2-heading', text: '# Header 2' },
			
 
				-      { label: '2-content', text: 'Content under header 2.' },
			
 
				+
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				+      {
			
 
				+        label: '1-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '# Header 1',
			
 
				+        tokenCount: encoder.encode('# Header 1').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Content under header 1.',
			
 
				+        tokenCount: encoder.encode('Content under header 1.').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-1-1-1-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '#### Header 1.1.1.1',
			
 
				+        tokenCount: encoder.encode('#### Header 1.1.1.1').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-1-1-1-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Content under header 1.1.1.1.',
			
 
				+        tokenCount: encoder.encode('Content under header 1.1.1.1.').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-2-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '## Header 1.2',
			
 
				+        tokenCount: encoder.encode('## Header 1.2').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-2-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Content under header 1.2.',
			
 
				+        tokenCount: encoder.encode('Content under header 1.2.').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '2-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '# Header 2',
			
 
				+        tokenCount: encoder.encode('# Header 2').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '2-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Content under header 2.',
			
 
				+        tokenCount: encoder.encode('Content under header 2.').length,
			
 
				+      },
			
 
				     ];
			
 
				-    const result = await splitMarkdownIntoChunks(markdown); // Await the result
			
 
				+
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -154,13 +267,35 @@ Content under header 1.
 
				 #### Header 1.1.1.1
			
 
				 Content under header 1.1.1.1.
			
 
				     `;
			
 
				-    const expected: Chunk[] = [
			
 
				-      { label: '1-heading', text: '# Header 1' },
			
 
				-      { label: '1-content', text: 'Content under header 1.' },
			
 
				-      { label: '1-1-1-1-heading', text: '#### Header 1.1.1.1' },
			
 
				-      { label: '1-1-1-1-content', text: 'Content under header 1.1.1.1.' },
			
 
				+
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				+      {
			
 
				+        label: '1-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '# Header 1',
			
 
				+        tokenCount: encoder.encode('# Header 1').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Content under header 1.',
			
 
				+        tokenCount: encoder.encode('Content under header 1.').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-1-1-1-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '#### Header 1.1.1.1',
			
 
				+        tokenCount: encoder.encode('#### Header 1.1.1.1').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-1-1-1-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Content under header 1.1.1.1.',
			
 
				+        tokenCount: encoder.encode('Content under header 1.1.1.1.').length,
			
 
				+      },
			
 
				     ];
			
 
				-    const result = await splitMarkdownIntoChunks(markdown); // Await the result
			
 
				+
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -173,15 +308,35 @@ This is the second paragraph without a header.
 
				 # Header 1
			
 
				 Content under header 1.
			
 
				     `;
			
 
				-    const expected: Chunk[] = [
			
 
				+
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				       {
			
 
				-        label: '0-content',
			
 
				-        text: 'This is the first paragraph without a header.\n\nThis is the second paragraph without a header.',
			
 
				+        label: '0-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'This is the first paragraph without a header.',
			
 
				+        tokenCount: encoder.encode('This is the first paragraph without a header.').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '0-content-2',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'This is the second paragraph without a header.',
			
 
				+        tokenCount: encoder.encode('This is the second paragraph without a header.').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '# Header 1',
			
 
				+        tokenCount: encoder.encode('# Header 1').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Content under header 1.',
			
 
				+        tokenCount: encoder.encode('Content under header 1.').length,
			
 
				       },
			
 
				-      { label: '1-heading', text: '# Header 1' },
			
 
				-      { label: '1-content', text: 'Content under header 1.' },
			
 
				     ];
			
 
				-    const result = await splitMarkdownIntoChunks(markdown); // Await the result
			
 
				+
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -193,12 +348,29 @@ Content under header 1.
 
				 
			
 
				 ### Header 1.1.1
			
 
				     `;
			
 
				-    const expected: Chunk[] = [
			
 
				-      { label: '1-heading', text: '# Header 1' },
			
 
				-      { label: '1-1-heading', text: '## Header 1.1' },
			
 
				-      { label: '1-1-1-heading', text: '### Header 1.1.1' },
			
 
				+
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				+      {
			
 
				+        label: '1-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '# Header 1',
			
 
				+        tokenCount: encoder.encode('# Header 1').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-1-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '## Header 1.1',
			
 
				+        tokenCount: encoder.encode('## Header 1.1').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-1-1-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '### Header 1.1.1',
			
 
				+        tokenCount: encoder.encode('### Header 1.1.1').length,
			
 
				+      },
			
 
				     ];
			
 
				-    const result = await splitMarkdownIntoChunks(markdown); // Await the result
			
 
				+
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -214,15 +386,47 @@ Another piece of content.
 
				 # Header 2
			
 
				 Content under header 2.
			
 
				     `;
			
 
				-    const expected: Chunk[] = [
			
 
				-      { label: '1-heading', text: '# Header 1' },
			
 
				-      { label: '1-content', text: 'Content under header 1.' },
			
 
				-      { label: '1-1-heading', text: '## Header 1.1' },
			
 
				-      { label: '1-1-content', text: 'Content under header 1.1.\nAnother piece of content.' },
			
 
				-      { label: '2-heading', text: '# Header 2' },
			
 
				-      { label: '2-content', text: 'Content under header 2.' },
			
 
				+
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				+      {
			
 
				+        label: '1-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '# Header 1',
			
 
				+        tokenCount: encoder.encode('# Header 1').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Content under header 1.',
			
 
				+        tokenCount: encoder.encode('Content under header 1.').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-1-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '## Header 1.1',
			
 
				+        tokenCount: encoder.encode('## Header 1.1').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-1-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Content under header 1.1.\nAnother piece of content.',
			
 
				+        tokenCount: encoder.encode('Content under header 1.1.\nAnother piece of content.').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '2-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '# Header 2',
			
 
				+        tokenCount: encoder.encode('# Header 2').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '2-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Content under header 2.',
			
 
				+        tokenCount: encoder.encode('Content under header 2.').length,
			
 
				+      },
			
 
				     ];
			
 
				-    const result = await splitMarkdownIntoChunks(markdown); // Await the result
			
 
				+
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -239,15 +443,44 @@ Content under header 1.
 
				 # Header 2
			
 
				 Content under header 2.
			
 
				     `;
			
 
				-    const expected: Chunk[] = [
			
 
				-      { label: '1-heading', text: '# Header 1' },
			
 
				-      { label: '1-content', text: 'Content under header 1.\n\n- Item 1\n  - Subitem 1\n- Item 2' },
			
 
				-      { label: '2-heading', text: '# Header 2' },
			
 
				-      { label: '2-content', text: 'Content under header 2.' },
			
 
				+
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				+      {
			
 
				+        label: '1-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '# Header 1',
			
 
				+        tokenCount: encoder.encode('# Header 1').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Content under header 1.',
			
 
				+        tokenCount: encoder.encode('Content under header 1.').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-content-2',
			
 
				+        type: 'list',
			
 
				+        text: '- Item 1\n  - Subitem 1\n- Item 2',
			
 
				+        tokenCount: encoder.encode('- Item 1\n  - Subitem 1\n- Item 2').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '2-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '# Header 2',
			
 
				+        tokenCount: encoder.encode('# Header 2').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '2-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Content under header 2.',
			
 
				+        tokenCount: encoder.encode('Content under header 2.').length,
			
 
				+      },
			
 
				     ];
			
 
				-    const result = await splitMarkdownIntoChunks(markdown); // Await the result
			
 
				+
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				+
			
 
				   test('code blocks containing # are not treated as headings', async() => {
			
 
				     const markdown = `
			
 
				 # Header 1
			
@@ -261,16 +494,49 @@ Additional content.
 
				 Content under header 2.
			
 
				     `;
			
 
				 
			
 
				-    const expected: Chunk[] = [
			
 
				-      { label: '1-heading', text: '# Header 1' },
			
 
				-      { label: '1-content', text: 'Some introductory content.\n\n```\n# This is a comment with a # symbol\nSome code line\n```\n\nAdditional content.' },
			
 
				-      { label: '2-heading', text: '# Header 2' },
			
 
				-      { label: '2-content', text: 'Content under header 2.' },
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				+      {
			
 
				+        label: '1-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '# Header 1',
			
 
				+        tokenCount: encoder.encode('# Header 1').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Some introductory content.',
			
 
				+        tokenCount: encoder.encode('Some introductory content.').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-content-2',
			
 
				+        type: 'code',
			
 
				+        text: '```\n# This is a comment with a # symbol\nSome code line\n```',
			
 
				+        tokenCount: encoder.encode('```\n# This is a comment with a # symbol\nSome code line\n```').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-content-3',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Additional content.',
			
 
				+        tokenCount: encoder.encode('Additional content.').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '2-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '# Header 2',
			
 
				+        tokenCount: encoder.encode('# Header 2').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '2-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Content under header 2.',
			
 
				+        tokenCount: encoder.encode('Content under header 2.').length,
			
 
				+      },
			
 
				     ];
			
 
				 
			
 
				-    const result = await splitMarkdownIntoChunks(markdown);
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				+
			
 
				   test('frontmatter is processed and labeled correctly', async() => {
			
 
				     const markdown = `---
			
 
				 title: Test Document
			
@@ -281,13 +547,155 @@ author: John Doe
 
				 Some introductory content.
			
 
				     `;
			
 
				 
			
 
				-    const expected: Chunk[] = [
			
 
				-      { label: 'frontmatter', text: JSON.stringify({ title: 'Test Document', author: 'John Doe' }, null, 2) },
			
 
				-      { label: '1-heading', text: '# Header 1' },
			
 
				-      { label: '1-content', text: 'Some introductory content.' },
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				+      {
			
 
				+        label: 'frontmatter',
			
 
				+        type: 'yaml',
			
 
				+        text: JSON.stringify({ title: 'Test Document', author: 'John Doe' }, null, 2),
			
 
				+        tokenCount: encoder.encode(JSON.stringify({ title: 'Test Document', author: 'John Doe' }, null, 2)).length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-heading',
			
 
				+        type: 'heading',
			
 
				+        text: '# Header 1',
			
 
				+        tokenCount: encoder.encode('# Header 1').length,
			
 
				+      },
			
 
				+      {
			
 
				+        label: '1-content-1',
			
 
				+        type: 'paragraph',
			
 
				+        text: 'Some introductory content.',
			
 
				+        tokenCount: encoder.encode('Some introductory content.').length,
			
 
				+      },
			
 
				     ];
			
 
				 
			
 
				-    const result = await splitMarkdownIntoChunks(markdown);
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 });
			
 
				+
			
 
				+describe('splitMarkdownIntoChunks', () => {
			
 
				+  const repeatedText = 'This is a repeated sentence for testing purposes. '.repeat(100);
			
 
				+  const markdown = `---
			
 
				+title: Test Document
			
 
				+author: John Doe
			
 
				+---
			
 
				+
			
 
				+${repeatedText}
			
 
				+
			
 
				+# Header 1
			
 
				+
			
 
				+This is the first paragraph under header 1. It contains some text to simulate a longer paragraph for testing.
			
 
				+This paragraph is extended with more content to ensure proper chunking behavior.${repeatedText}
			
 
				+
			
 
				+## Header 1-1
			
 
				+
			
 
				+This is the first paragraph under header 1-1. The text is a bit longer to ensure proper chunking. More text follows.
			
 
				+
			
 
				+
			
 
				+### Header 1-1-1
			
 
				+
			
 
				+This is the first paragraph under header 1-1-1. The content is nested deeper,
			
 
				+making sure that the chunking algorithm works properly with multiple levels of headers.
			
 
				+
			
 
				+This is another paragraph under header 1-1-1, continuing the content at this deeper level.
			
 
				+
			
 
				+#### Header 1-1-1-1
			
 
				+
			
 
				+Now we have reached the fourth level of headers. The text here should also be properly chunked and grouped with its parent headers.
			
 
				+
			
 
				+This is another paragraph under header 1-1-1-1. It should be grouped with the correct higher-level headers.
			
 
				+
			
 
				+# Header 2
			
 
				+
			
 
				+Here is some content under header 2. This section should also be sufficiently long to ensure that the token count threshold is reached in the test.
			
 
				+
			
 
				+## Header 2-1
			
 
				+
			
 
				+${repeatedText}
			
 
				+
			
 
				+${repeatedText}
			
 
				+
			
 
				+Another sub-header under header 2 with text for testing chunking behavior. This is a fairly lengthy paragraph as well.
			
 
				+
			
 
				+We now have a fourth-level sub-header under header 2-1. This ensures that the chunking logic can handle deeply nested content.
			
 
				+
			
 
				+### Header 2-1-1
			
 
				+
			
 
				+Here is another paragraph under header 2-1-1. This paragraph is part of a more deeply nested section.
			
 
				+
			
 
				+# Header 3
			
 
				+
			
 
				+Continuing with more headers and content to make sure the markdown document is sufficiently large. This is a new header with more paragraphs under it.
			
 
				+
			
 
				+### Header 3-1
			
 
				+
			
 
				+This is a sub-header under header 3. The content here continues to grow, ensuring that the markdown is long enough to trigger multiple chunks.
			
 
				+
			
 
				+#### Header 3-1-1
			
 
				+
			
 
				+Here is a fourth-level sub-header under header 3-1. This paragraph is designed to create a larger markdown file for testing purposes.
			
 
				+`;
			
 
				+  test('Each chunk should not exceed the specified token count', async() => {
			
 
				+    const maxToken = 800;
			
 
				+    const result = await splitMarkdownIntoChunks(markdown, MODEL, maxToken);
			
 
				+
			
 
				+    result.forEach((chunk) => {
			
 
				+      const tokenCount = encoder.encode(chunk).length;
			
 
				+      expect(tokenCount).toBeLessThanOrEqual(maxToken * 1.1);
			
 
				+    });
			
 
				+  });
			
 
				+  test('Each chunk should include the relevant top-level header', async() => {
			
 
				+    const result = await splitMarkdownIntoChunks(markdown, MODEL, 800);
			
 
				+
			
 
				+    result.forEach((chunk) => {
			
 
				+      const containsHeader1 = chunk.includes('# Header 1');
			
 
				+      const containsHeader2 = chunk.includes('# Header 2');
			
 
				+      const containsHeader3 = chunk.includes('# Header 3');
			
 
				+      const doesNotContainHash = !chunk.includes('# ');
			
 
				+
			
 
				+      expect(containsHeader1 || containsHeader2 || containsHeader3 || doesNotContainHash).toBe(true);
			
 
				+    });
			
 
				+  });
			
 
				+  test('Should throw an error if a header exceeds half of maxToken size with correct error message', async() => {
			
 
				+    const maxToken = 800;
			
 
				+    const markdownWithLongHeader = `
			
 
				+# Short Header 1
			
 
				+
			
 
				+This is the first paragraph under short header 1. It contains some text for testing purposes.
			
 
				+
			
 
				+## ${repeatedText}
			
 
				+
			
 
				+This is the first paragraph under the long header. It contains text to ensure that the header length check is triggered if the header is too long.
			
 
				+
			
 
				+# Short Header 2
			
 
				+
			
 
				+Another section with a shorter header, but enough content to ensure proper chunking.
			
 
				+`;
			
 
				+
			
 
				+    try {
			
 
				+      await splitMarkdownIntoChunks(markdownWithLongHeader, MODEL, maxToken);
			
 
				+    }
			
 
				+    catch (error) {
			
 
				+      if (error instanceof Error) {
			
 
				+        expect(error.message).toContain('Heading token count is too large');
			
 
				+      }
			
 
				+      else {
			
 
				+        throw new Error('An unknown error occurred');
			
 
				+      }
			
 
				+    }
			
 
				+  });
			
 
				+
			
 
				+  test('Should return the entire markdown as a single chunk if token count is less than or equal to maxToken', async() => {
			
 
				+    const markdownText = `
			
 
				+    # Header 1
			
 
				+    This is a short paragraph under header 1. It contains only a few sentences to ensure that the total token count remains under the maxToken limit.
			
 
				+    `;
			
 
				+
			
 
				+    const maxToken = 800;
			
 
				+
			
 
				+    const result = await splitMarkdownIntoChunks(markdownText, MODEL, maxToken);
			
 
				+
			
 
				+    expect(result).toHaveLength(1);
			
 
				+    expect(result[0]).toBe(markdownText);
			
 
				+  });
			
 
				+});
			
--- a/yarn.lock
+++ b/yarn.lock
@@ -2259,6 +2259,7 @@
 
				   version "1.0.0"
			
 
				   dependencies:
			
 
				     "@types/js-yaml" "^4.0.9"
			
 
				+    js-tiktoken "^1.0.15"
			
 
				     remark-frontmatter "^5.0.0"
			
 
				     remark-gfm "^4.0.0"
			
 
				     remark-parse "^11.0.0"
			
@@ -11731,6 +11732,13 @@ js-sha256@^0.9.0:
 
				   resolved "https://registry.yarnpkg.com/js-sha256/-/js-sha256-0.9.0.tgz#0b89ac166583e91ef9123644bd3c5334ce9d0966"
			
 
				   integrity sha512-sga3MHh9sgQN2+pJ9VYZ+1LPwXOxuBJBA5nrR5/ofPfuiJBE2hnjsaN8se8JznOmGLN2p49Pe5U/ttafcs/apA==
			
 
				 
			
 
				+js-tiktoken@^1.0.15:
			
 
				+  version "1.0.15"
			
 
				+  resolved "https://registry.yarnpkg.com/js-tiktoken/-/js-tiktoken-1.0.15.tgz#92a7d829f6950c2cfb35cc52555502e3d6e2ebac"
			
 
				+  integrity sha512-65ruOWWXDEZHHbAo7EjOcNxOGasQKbL4Fq3jEr2xsCqSsoOo6VVSqzWQb6PRIqypFSDcma4jO90YP0w5X8qVXQ==
			
 
				+  dependencies:
			
 
				+    base64-js "^1.5.1"
			
 
				+
			
 
				 "js-tokens@^3.0.0 || ^4.0.0", js-tokens@^4.0.0:
			
 
				   version "4.0.0"
			
 
				   resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-4.0.0.tgz#19203fb59991df98e3a287050d4647cdeaf32499"