1 year ago · 4f5d584808
--- a/packages/markdown-splitter/src/services/markdown-splitter.ts
+++ b/packages/markdown-splitter/src/services/markdown-splitter.ts
@@ -8,9 +8,9 @@ import type { Options as StringifyOptions } from 'remark-stringify';
 
				 import remarkStringify from 'remark-stringify';
			
 
				 import { unified } from 'unified';
			
 
				 
			
 
				-export type Chunk = {
			
 
				+export type MarkdownFragment = {
			
 
				   label: string;
			
 
				-  type?: string;
			
 
				+  type: string;
			
 
				   text: string;
			
 
				   tokenCount: number;
			
 
				 };
			
@@ -42,19 +42,19 @@ function updateSectionNumbers(sectionNumbers: number[], headingDepth: number): s
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * Splits Markdown text into labeled chunks using remark-parse and remark-stringify,
			
 
				+ * Splits Markdown text into labeled markdownFragments using remark-parse and remark-stringify,
			
 
				  * processing each content node separately and labeling them as 1-content-1, 1-content-2, etc.
			
 
				  * @param markdownText - The input Markdown string.
			
 
				- * @returns An array of labeled chunks.
			
 
				+ * @returns An array of labeled markdownFragments.
			
 
				  */
			
 
				-export async function splitMarkdownIntoChunks(markdownText: string, model: TiktokenModel): Promise<Chunk[]> {
			
 
				-  const chunks: Chunk[] = [];
			
 
				+export async function splitMarkdownIntoFragments(markdownText: string, model: TiktokenModel): Promise<MarkdownFragment[]> {
			
 
				+  const markdownFragments: MarkdownFragment[] = [];
			
 
				   const sectionNumbers: number[] = [];
			
 
				   let currentSectionLabel = '';
			
 
				   const contentCounters: Record<string, number> = {};
			
 
				 
			
 
				   if (typeof markdownText !== 'string' || markdownText.trim() === '') {
			
 
				-    return chunks;
			
 
				+    return markdownFragments;
			
 
				   }
			
 
				 
			
 
				   const encoder = encodingForModel(model);
			
@@ -83,7 +83,7 @@ export async function splitMarkdownIntoChunks(markdownText: string, model: Tikto
 
				       const frontmatter = yaml.load(node.value) as Record<string, unknown>;
			
 
				       const frontmatterText = JSON.stringify(frontmatter, null, 2);
			
 
				       const tokenCount = encoder.encode(frontmatterText).length;
			
 
				-      chunks.push({
			
 
				+      markdownFragments.push({
			
 
				         label: 'frontmatter',
			
 
				         type: 'yaml',
			
 
				         text: frontmatterText,
			
@@ -96,7 +96,7 @@ export async function splitMarkdownIntoChunks(markdownText: string, model: Tikto
 
				 
			
 
				       const headingMarkdown = stringifier.stringify(node as any).trim(); // eslint-disable-line @typescript-eslint/no-explicit-any
			
 
				       const tokenCount = encoder.encode(headingMarkdown).length;
			
 
				-      chunks.push({
			
 
				+      markdownFragments.push({
			
 
				         label: `${currentSectionLabel}-heading`, type: node.type, text: headingMarkdown, tokenCount,
			
 
				       });
			
 
				     }
			
@@ -115,12 +115,12 @@ export async function splitMarkdownIntoChunks(markdownText: string, model: Tikto
 
				           ? `${currentSectionLabel}-content-${contentCounters[contentCountKey]}`
			
 
				           : `0-content-${contentCounters[contentCountKey]}`;
			
 
				         const tokenCount = encoder.encode(contentMarkdown).length;
			
 
				-        chunks.push({
			
 
				+        markdownFragments.push({
			
 
				           label: contentLabel, type: node.type, text: contentMarkdown, tokenCount,
			
 
				         });
			
 
				       }
			
 
				     }
			
 
				   }
			
 
				 
			
 
				-  return chunks;
			
 
				+  return markdownFragments;
			
 
				 }
			
--- a/packages/markdown-splitter/src/services/markdown-token-splitter.ts
+++ b/packages/markdown-splitter/src/services/markdown-token-splitter.ts
@@ -1,14 +1,14 @@
 
				 import type { TiktokenModel } from 'js-tiktoken';
			
 
				 
			
 
				-import { splitMarkdownIntoChunks, type Chunk } from './markdown-splitter';
			
 
				+import { splitMarkdownIntoFragments, type MarkdownFragment } from './markdown-splitter';
			
 
				 
			
 
				-type GroupedChunks = { [prefix: string]: Chunk[] };
			
 
				+type MarkdownFragmentGroups = MarkdownFragment[][] ;
			
 
				 
			
 
				 function assembleMarkdownRecursively(
			
 
				-    chunks: Chunk[],
			
 
				+    markdownFragments: MarkdownFragment[],
			
 
				     maxToken: number,
			
 
				-): GroupedChunks {
			
 
				-  const labels = chunks.map(chunk => chunk.label);
			
 
				+): MarkdownFragmentGroups {
			
 
				+  const labels = markdownFragments.map(fragment => fragment.label);
			
 
				 
			
 
				   // Get a list of unique prefixes
			
 
				   const uniquePrefixes: string[] = [...new Set(labels.map((label) => {
			
@@ -21,7 +21,7 @@ function assembleMarkdownRecursively(
 
				 
			
 
				 
			
 
				   // Group chunks by prefix
			
 
				-  const groupedChunks: GroupedChunks = {};
			
 
				+  const fragmentGroupes: MarkdownFragmentGroups = [];
			
 
				   let remainingPrefixes = [...uniquePrefixes];
			
 
				 
			
 
				   // Process chunks so that the total token count per level doesn't exceed maxToken
			
@@ -31,65 +31,65 @@ function assembleMarkdownRecursively(
 
				 
			
 
				     if (!hasNextLevelPrefix) {
			
 
				       // If there is no prefix that starts with the current prefix, group the chunks directly
			
 
				-      let strictMatchingChunks = chunks.filter(chunk => chunk.label === prefix);
			
 
				+      let matchingFragments = markdownFragments.filter(fragment => fragment.label === prefix);
			
 
				 
			
 
				       // Add parent heading if it exists
			
 
				       const parts = prefix.split('-');
			
 
				       for (let i = 1; i < parts.length; i++) {
			
 
				         const parentPrefix = parts.slice(0, i).join('-');
			
 
				-        const parentHeading = chunks.find(chunk => chunk.label === `${parentPrefix}-heading`);
			
 
				+        const parentHeading = markdownFragments.find(fragment => fragment.label === `${parentPrefix}-heading`);
			
 
				         if (parentHeading) {
			
 
				-          strictMatchingChunks = [parentHeading, ...strictMatchingChunks]; // Add the heading at the front
			
 
				+          matchingFragments = [parentHeading, ...matchingFragments]; // Add the heading at the front
			
 
				         }
			
 
				       }
			
 
				 
			
 
				-      groupedChunks[prefix] = strictMatchingChunks;
			
 
				+      fragmentGroupes.push(matchingFragments);
			
 
				     }
			
 
				     else {
			
 
				       // Filter chunks that start with the current prefix
			
 
				-      let matchingChunks = chunks.filter(chunk => chunk.label.startsWith(prefix));
			
 
				+      let matchingFragments = markdownFragments.filter(fragment => fragment.label.startsWith(prefix));
			
 
				 
			
 
				       // Add parent heading if it exists
			
 
				       const parts = prefix.split('-');
			
 
				       for (let i = 1; i < parts.length; i++) {
			
 
				         const parentPrefix = parts.slice(0, i).join('-');
			
 
				-        const parentHeading = chunks.find(chunk => chunk.label === `${parentPrefix}-heading`);
			
 
				+        const parentHeading = markdownFragments.find(fragment => fragment.label === `${parentPrefix}-heading`);
			
 
				         if (parentHeading) {
			
 
				-          matchingChunks = [parentHeading, ...matchingChunks];
			
 
				+          matchingFragments = [parentHeading, ...matchingFragments];
			
 
				         }
			
 
				       }
			
 
				 
			
 
				       // Calculate total token count including parent headings
			
 
				-      const totalTokenCount = matchingChunks.reduce((sum, chunk) => sum + chunk.tokenCount, 0);
			
 
				+      const totalTokenCount = matchingFragments.reduce((sum, fragment) => sum + fragment.tokenCount, 0);
			
 
				 
			
 
				       // If the total token count doesn't exceed maxToken, group the chunks
			
 
				       if (totalTokenCount <= maxToken) {
			
 
				-        groupedChunks[prefix] = matchingChunks;
			
 
				+        fragmentGroupes.push(matchingFragments);
			
 
				         remainingPrefixes = remainingPrefixes.filter(p => !p.startsWith(`${prefix}-`));
			
 
				       }
			
 
				       else {
			
 
				         // If it exceeds maxToken, strictly filter chunks by the exact numeric prefix
			
 
				-        const strictMatchingChunks = chunks.filter((chunk) => {
			
 
				-          const match = chunk.label.match(/^\d+(-\d+)*(?=-)/);
			
 
				+        const strictMatchingFragments = markdownFragments.filter((fragment) => {
			
 
				+          const match = fragment.label.match(/^\d+(-\d+)*(?=-)/);
			
 
				           return match && match[0] === prefix;
			
 
				         });
			
 
				 
			
 
				         // Add parent heading if it exists
			
 
				         for (let i = 1; i < parts.length; i++) {
			
 
				           const parentPrefix = parts.slice(0, i).join('-');
			
 
				-          const parentHeading = chunks.find(chunk => chunk.label === `${parentPrefix}-heading`);
			
 
				+          const parentHeading = markdownFragments.find(fragment => fragment.label === `${parentPrefix}-heading`);
			
 
				           if (parentHeading) {
			
 
				-            strictMatchingChunks.unshift(parentHeading); // Add the heading at the front
			
 
				+            strictMatchingFragments.unshift(parentHeading); // Add the heading at the front
			
 
				           }
			
 
				         }
			
 
				 
			
 
				-        groupedChunks[prefix] = strictMatchingChunks;
			
 
				+        fragmentGroupes.push(strictMatchingFragments);
			
 
				       }
			
 
				     }
			
 
				     remainingPrefixes.shift();
			
 
				   }
			
 
				 
			
 
				-  return groupedChunks;
			
 
				+  return fragmentGroupes;
			
 
				 }
			
 
				 
			
 
				 // Function to group markdown into chunks based on token count
			
@@ -97,104 +97,88 @@ export async function assembleMarkdownIntoChunk(
 
				     markdownText: string,
			
 
				     model = 'gpt-4' as TiktokenModel,
			
 
				     maxToken = 800,
			
 
				-): Promise<GroupedChunks> {
			
 
				+): Promise<string[]> {
			
 
				   // Split markdown text into chunks
			
 
				-  const chunks = await splitMarkdownIntoChunks(markdownText, model);
			
 
				+  const markdownFragments = await splitMarkdownIntoFragments(markdownText, model);
			
 
				+  const chunks = [] as string[];
			
 
				 
			
 
				   // Group the chunks based on token count
			
 
				-  const groupedChunks = assembleMarkdownRecursively(chunks, maxToken);
			
 
				-
			
 
				-  for (const prefix of Object.keys(groupedChunks)) {
			
 
				-    const chunks = groupedChunks[prefix];
			
 
				+  const fragmentGroupes = assembleMarkdownRecursively(markdownFragments, maxToken);
			
 
				 
			
 
				+  fragmentGroupes.forEach((fragmentGroupe) => {
			
 
				     // Calculate the total token count for each group
			
 
				-    const totalTokenCount = chunks.reduce((sum, chunk) => sum + chunk.tokenCount, 0);
			
 
				+    const totalTokenCount = fragmentGroupe.reduce((sum, fragment) => sum + fragment.tokenCount, 0);
			
 
				 
			
 
				     // If the total token count doesn't exceed maxToken, combine the chunks into one
			
 
				     if (totalTokenCount <= maxToken) {
			
 
				-      const combinedContent = chunks.map((chunk, index) => {
			
 
				-        const nextChunk = chunks[index + 1];
			
 
				-        if (nextChunk) {
			
 
				+      const chunk = fragmentGroupe.map((fragment, index) => {
			
 
				+        const nextFragment = fragmentGroupe[index + 1];
			
 
				+        if (nextFragment) {
			
 
				           // If both the current and next chunks are headings, add a single newline
			
 
				-          if (chunk.type === 'heading' && nextChunk.type === 'heading') {
			
 
				-            return `${chunk.text}\n`;
			
 
				+          if (fragment.type === 'heading' && nextFragment.type === 'heading') {
			
 
				+            return `${fragment.text}\n`;
			
 
				           }
			
 
				           // Add two newlines for other cases
			
 
				-          return `${chunk.text}\n\n`;
			
 
				+          return `${fragment.text}\n\n`;
			
 
				         }
			
 
				-        return chunk.text; // No newlines for the last chunk
			
 
				+        return fragment.text; // No newlines for the last chunk
			
 
				       }).join('');
			
 
				 
			
 
				-      // Combine into one chunk while maintaining the token count
			
 
				-      groupedChunks[prefix] = [{
			
 
				-        label: prefix,
			
 
				-        text: combinedContent,
			
 
				-        tokenCount: totalTokenCount,
			
 
				-      }];
			
 
				+      chunks.push(chunk);
			
 
				     }
			
 
				     else {
			
 
				       // If the total token count exceeds maxToken, split content
			
 
				-      const headingChunks = chunks.filter(chunk => chunk.type === 'heading'); // Find all headings
			
 
				-      const headingText = headingChunks.map(heading => heading.text).join('\n'); // Combine headings with one newline
			
 
				-
			
 
				-      const newGroupedChunks = []; // Create a new group of chunks
			
 
				+      const headingFragments = fragmentGroupe.filter(fragment => fragment.type === 'heading'); // Find all headings
			
 
				+      const headingText = headingFragments.map(heading => heading.text).join('\n'); // Combine headings with one newline
			
 
				 
			
 
				-      for (const chunk of chunks) {
			
 
				-        if (chunk.label.includes('content')) {
			
 
				+      for (const fragment of fragmentGroupe) {
			
 
				+        if (fragment.label.includes('content')) {
			
 
				           // Combine heading and paragraph content
			
 
				-          const combinedText = `${headingText}\n\n${chunk.text}`;
			
 
				-          const combinedTokenCount = headingChunks.reduce((sum, heading) => sum + heading.tokenCount, 0) + chunk.tokenCount;
			
 
				+          const combinedTokenCount = headingFragments.reduce((sum, heading) => sum + heading.tokenCount, 0) + fragment.tokenCount;
			
 
				           // Check if headingChunks alone exceed maxToken
			
 
				-          const headingTokenCount = headingChunks.reduce((sum, heading) => sum + heading.tokenCount, 0);
			
 
				-          if (headingTokenCount > maxToken) {
			
 
				+          const headingTokenCount = headingFragments.reduce((sum, heading) => sum + heading.tokenCount, 0);
			
 
				+
			
 
				+          if (headingTokenCount > maxToken / 2) {
			
 
				             console.error(`Heading token count exceeds maxToken. Heading token count: ${headingTokenCount}, maxToken: ${maxToken}`);
			
 
				             break; // Exit the loop
			
 
				           }
			
 
				 
			
 
				           // If the combined token count exceeds maxToken, split the content by character count
			
 
				           if (combinedTokenCount > maxToken) {
			
 
				-            const headingTokenCount = headingChunks.reduce((sum, heading) => sum + heading.tokenCount, 0);
			
 
				+            const headingTokenCount = headingFragments.reduce((sum, heading) => sum + heading.tokenCount, 0);
			
 
				             const remainingTokenCount = maxToken - headingTokenCount;
			
 
				 
			
 
				             // Calculate the total character count and token count
			
 
				-            const totalCharCount = chunk.text.length;
			
 
				-            const totalTokenCount = chunk.tokenCount;
			
 
				+            const fragmentCharCount = fragment.text.length;
			
 
				+            const fragmenTokenCount = fragment.tokenCount;
			
 
				 
			
 
				             // Calculate the character count for splitting
			
 
				-            const charCountForSplit = Math.floor((remainingTokenCount / totalTokenCount) * totalCharCount);
			
 
				+            const charCountForSplit = Math.floor((remainingTokenCount / fragmenTokenCount) * fragmentCharCount);
			
 
				 
			
 
				             // Split content based on character count
			
 
				             const splitContents = [];
			
 
				-            for (let i = 0; i < chunk.text.length; i += charCountForSplit) {
			
 
				-              splitContents.push(chunk.text.slice(i, i + charCountForSplit));
			
 
				+            for (let i = 0; i < fragment.text.length; i += charCountForSplit) {
			
 
				+              splitContents.push(fragment.text.slice(i, i + charCountForSplit));
			
 
				             }
			
 
				 
			
 
				             // Add each split content to the new group of chunks
			
 
				-            splitContents.forEach((splitText, i) => {
			
 
				-              newGroupedChunks.push({
			
 
				-                label: `${chunk.label}-split-${i + 1}`,
			
 
				-                text: `${headingText}\n\n${splitText}`,
			
 
				-                tokenCount: remainingTokenCount,
			
 
				-                type: 'split',
			
 
				-              });
			
 
				+            splitContents.forEach((splitText) => {
			
 
				+              const chunk = headingText
			
 
				+                ? `${headingText}\n\n${splitText}`
			
 
				+                : `${splitText}`;
			
 
				+              chunks.push(chunk);
			
 
				             });
			
 
				           }
			
 
				           else {
			
 
				-            // If the combined token count doesn't exceed maxToken, add as-is
			
 
				-            newGroupedChunks.push({
			
 
				-              label: chunk.label,
			
 
				-              text: combinedText,
			
 
				-              tokenCount: combinedTokenCount,
			
 
				-              type: 'combined',
			
 
				-            });
			
 
				+            const chunk = headingText
			
 
				+              ? `${headingText}\n\n${fragment.text}`
			
 
				+              : `${fragment.text}`;
			
 
				+            chunks.push(chunk);
			
 
				           }
			
 
				         }
			
 
				       }
			
 
				-
			
 
				-      // Update grouped chunks with the new group
			
 
				-      groupedChunks[prefix] = newGroupedChunks;
			
 
				     }
			
 
				-  }
			
 
				+  });
			
 
				 
			
 
				-  return groupedChunks;
			
 
				+  return chunks;
			
 
				 }
			
--- a/packages/markdown-splitter/test/index.spec.ts
+++ b/packages/markdown-splitter/test/index.spec.ts
@@ -1,17 +1,17 @@
 
				 import { encodingForModel, type TiktokenModel } from 'js-tiktoken';
			
 
				 
			
 
				-import type { Chunk } from '../src/services/markdown-splitter';
			
 
				-import { splitMarkdownIntoChunks } from '../src/services/markdown-splitter';
			
 
				+import type { MarkdownFragment } from '../src/services/markdown-splitter';
			
 
				+import { splitMarkdownIntoFragments } from '../src/services/markdown-splitter';
			
 
				 
			
 
				 const MODEL: TiktokenModel = 'gpt-4';
			
 
				 const encoder = encodingForModel(MODEL);
			
 
				 
			
 
				-describe('splitMarkdownIntoChunks', () => {
			
 
				+describe('splitMarkdownIntoFragments', () => {
			
 
				 
			
 
				   test('handles empty markdown string', async() => {
			
 
				     const markdown = '';
			
 
				-    const expected: Chunk[] = [];
			
 
				-    const result = await splitMarkdownIntoChunks(markdown, MODEL);
			
 
				+    const expected: MarkdownFragment[] = [];
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -22,7 +22,7 @@ It spans multiple lines.
 
				 Another paragraph.
			
 
				     `;
			
 
				 
			
 
				-    const expected: Chunk[] = [
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				       {
			
 
				         label: '0-content-1',
			
 
				         type: 'paragraph',
			
@@ -37,7 +37,7 @@ Another paragraph.
 
				       },
			
 
				     ];
			
 
				 
			
 
				-    const result = await splitMarkdownIntoChunks(markdown, MODEL);
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -53,7 +53,7 @@ Content under header 1.1.
 
				 Content under header 2.
			
 
				     `;
			
 
				 
			
 
				-    const expected: Chunk[] = [
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				       {
			
 
				         label: '1-heading',
			
 
				         type: 'heading',
			
@@ -92,7 +92,7 @@ Content under header 2.
 
				       },
			
 
				     ];
			
 
				 
			
 
				-    const result = await splitMarkdownIntoChunks(markdown, MODEL);
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -116,7 +116,7 @@ Content of chapter 2.
 
				 Content of section 2.1.
			
 
				     `;
			
 
				 
			
 
				-    const expected: Chunk[] = [
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				       {
			
 
				         label: '0-content-1',
			
 
				         type: 'paragraph',
			
@@ -185,7 +185,7 @@ Content of section 2.1.
 
				       },
			
 
				     ];
			
 
				 
			
 
				-    const result = await splitMarkdownIntoChunks(markdown, MODEL);
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -204,7 +204,7 @@ Content under header 1.2.
 
				 Content under header 2.
			
 
				     `;
			
 
				 
			
 
				-    const expected: Chunk[] = [
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				       {
			
 
				         label: '1-heading',
			
 
				         type: 'heading',
			
@@ -255,7 +255,7 @@ Content under header 2.
 
				       },
			
 
				     ];
			
 
				 
			
 
				-    const result = await splitMarkdownIntoChunks(markdown, MODEL);
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -268,7 +268,7 @@ Content under header 1.
 
				 Content under header 1.1.1.1.
			
 
				     `;
			
 
				 
			
 
				-    const expected: Chunk[] = [
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				       {
			
 
				         label: '1-heading',
			
 
				         type: 'heading',
			
@@ -295,7 +295,7 @@ Content under header 1.1.1.1.
 
				       },
			
 
				     ];
			
 
				 
			
 
				-    const result = await splitMarkdownIntoChunks(markdown, MODEL);
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -309,7 +309,7 @@ This is the second paragraph without a header.
 
				 Content under header 1.
			
 
				     `;
			
 
				 
			
 
				-    const expected: Chunk[] = [
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				       {
			
 
				         label: '0-content-1',
			
 
				         type: 'paragraph',
			
@@ -336,7 +336,7 @@ Content under header 1.
 
				       },
			
 
				     ];
			
 
				 
			
 
				-    const result = await splitMarkdownIntoChunks(markdown, MODEL);
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -349,7 +349,7 @@ Content under header 1.
 
				 ### Header 1.1.1
			
 
				     `;
			
 
				 
			
 
				-    const expected: Chunk[] = [
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				       {
			
 
				         label: '1-heading',
			
 
				         type: 'heading',
			
@@ -370,7 +370,7 @@ Content under header 1.
 
				       },
			
 
				     ];
			
 
				 
			
 
				-    const result = await splitMarkdownIntoChunks(markdown, MODEL);
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -387,7 +387,7 @@ Another piece of content.
 
				 Content under header 2.
			
 
				     `;
			
 
				 
			
 
				-    const expected: Chunk[] = [
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				       {
			
 
				         label: '1-heading',
			
 
				         type: 'heading',
			
@@ -426,7 +426,7 @@ Content under header 2.
 
				       },
			
 
				     ];
			
 
				 
			
 
				-    const result = await splitMarkdownIntoChunks(markdown, MODEL);
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -444,7 +444,7 @@ Content under header 1.
 
				 Content under header 2.
			
 
				     `;
			
 
				 
			
 
				-    const expected: Chunk[] = [
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				       {
			
 
				         label: '1-heading',
			
 
				         type: 'heading',
			
@@ -477,7 +477,7 @@ Content under header 2.
 
				       },
			
 
				     ];
			
 
				 
			
 
				-    const result = await splitMarkdownIntoChunks(markdown, MODEL);
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -494,7 +494,7 @@ Additional content.
 
				 Content under header 2.
			
 
				     `;
			
 
				 
			
 
				-    const expected: Chunk[] = [
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				       {
			
 
				         label: '1-heading',
			
 
				         type: 'heading',
			
@@ -533,7 +533,7 @@ Content under header 2.
 
				       },
			
 
				     ];
			
 
				 
			
 
				-    const result = await splitMarkdownIntoChunks(markdown, MODEL);
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 
			
@@ -547,7 +547,7 @@ author: John Doe
 
				 Some introductory content.
			
 
				     `;
			
 
				 
			
 
				-    const expected: Chunk[] = [
			
 
				+    const expected: MarkdownFragment[] = [
			
 
				       {
			
 
				         label: 'frontmatter',
			
 
				         type: 'yaml',
			
@@ -568,7 +568,7 @@ Some introductory content.
 
				       },
			
 
				     ];
			
 
				 
			
 
				-    const result = await splitMarkdownIntoChunks(markdown, MODEL);
			
 
				+    const result = await splitMarkdownIntoFragments(markdown, MODEL);
			
 
				     expect(result).toEqual(expected);
			
 
				   });
			
 
				 });