|
@@ -1,3 +1,6 @@
|
|
|
|
|
+import yaml from 'js-yaml';
|
|
|
|
|
+import remarkFrontmatter from 'remark-frontmatter'; // Frontmatter processing
|
|
|
|
|
+import remarkGfm from 'remark-gfm'; // GFM processing
|
|
|
import remarkParse from 'remark-parse';
|
|
import remarkParse from 'remark-parse';
|
|
|
import type { Options as StringifyOptions } from 'remark-stringify';
|
|
import type { Options as StringifyOptions } from 'remark-stringify';
|
|
|
import remarkStringify from 'remark-stringify';
|
|
import remarkStringify from 'remark-stringify';
|
|
@@ -58,45 +61,55 @@ function updateSectionNumbers(sectionNumbers: number[], headingDepth: number): s
|
|
|
export async function splitMarkdownIntoChunks(markdownText: string): Promise<Chunk[]> {
|
|
export async function splitMarkdownIntoChunks(markdownText: string): Promise<Chunk[]> {
|
|
|
const chunks: Chunk[] = [];
|
|
const chunks: Chunk[] = [];
|
|
|
const sectionNumbers: number[] = [];
|
|
const sectionNumbers: number[] = [];
|
|
|
|
|
+ let frontmatter: Record<string, unknown> | null = null; // Variable to store frontmatter
|
|
|
|
|
+ const contentBuffer: string[] = [];
|
|
|
|
|
+ let currentSectionLabel = '';
|
|
|
|
|
|
|
|
if (typeof markdownText !== 'string' || markdownText.trim() === '') {
|
|
if (typeof markdownText !== 'string' || markdownText.trim() === '') {
|
|
|
return chunks;
|
|
return chunks;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- const parser = unified().use(remarkParse);
|
|
|
|
|
|
|
+ const parser = unified()
|
|
|
|
|
+ .use(remarkParse)
|
|
|
|
|
+ .use(remarkFrontmatter, ['yaml'])
|
|
|
|
|
+ .use(remarkGfm); // Enable GFM extensions
|
|
|
|
|
|
|
|
const stringifyOptions: StringifyOptions = {
|
|
const stringifyOptions: StringifyOptions = {
|
|
|
bullet: '-', // Set list bullet to hyphen
|
|
bullet: '-', // Set list bullet to hyphen
|
|
|
rule: '-', // Use hyphen for horizontal rules
|
|
rule: '-', // Use hyphen for horizontal rules
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
- // Create stringifier once
|
|
|
|
|
- const stringifier = unified().use(remarkStringify, stringifyOptions);
|
|
|
|
|
|
|
+ const stringifier = unified()
|
|
|
|
|
+ .use(remarkFrontmatter, ['yaml'])
|
|
|
|
|
+ .use(remarkGfm)
|
|
|
|
|
+ .use(remarkStringify, stringifyOptions);
|
|
|
|
|
|
|
|
const parsedTree = parser.parse(markdownText);
|
|
const parsedTree = parser.parse(markdownText);
|
|
|
|
|
|
|
|
- const contentBuffer: string[] = [];
|
|
|
|
|
- let currentSectionLabel = '';
|
|
|
|
|
-
|
|
|
|
|
- const markdownNodes = parsedTree.children;
|
|
|
|
|
-
|
|
|
|
|
- for (const node of markdownNodes) {
|
|
|
|
|
- if (node.type === 'heading') {
|
|
|
|
|
|
|
+ // Iterate over top-level nodes to prevent duplication
|
|
|
|
|
+ for (const node of parsedTree.children) {
|
|
|
|
|
+ if (node.type === 'yaml') {
|
|
|
|
|
+ frontmatter = yaml.load(node.value) as Record<string, unknown>;
|
|
|
|
|
+ }
|
|
|
|
|
+ else if (node.type === 'heading') {
|
|
|
// Process pending content before heading
|
|
// Process pending content before heading
|
|
|
if (contentBuffer.length > 0) {
|
|
if (contentBuffer.length > 0) {
|
|
|
const contentLabel = currentSectionLabel !== '' ? `${currentSectionLabel}-content` : '0-content';
|
|
const contentLabel = currentSectionLabel !== '' ? `${currentSectionLabel}-content` : '0-content';
|
|
|
addContentChunk(chunks, contentBuffer, contentLabel);
|
|
addContentChunk(chunks, contentBuffer, contentLabel);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- const headingDepth = node.depth as number;
|
|
|
|
|
|
|
+ const headingDepth = node.depth;
|
|
|
currentSectionLabel = updateSectionNumbers(sectionNumbers, headingDepth);
|
|
currentSectionLabel = updateSectionNumbers(sectionNumbers, headingDepth);
|
|
|
|
|
|
|
|
- const headingMarkdown = stringifier.stringify(node as any); // eslint-disable-line @typescript-eslint/no-explicit-any
|
|
|
|
|
|
|
+ const headingMarkdown = stringifier.stringify(node as any);// eslint-disable-line @typescript-eslint/no-explicit-any
|
|
|
chunks.push({ label: `${currentSectionLabel}-heading`, text: headingMarkdown.trim() });
|
|
chunks.push({ label: `${currentSectionLabel}-heading`, text: headingMarkdown.trim() });
|
|
|
}
|
|
}
|
|
|
else {
|
|
else {
|
|
|
- const contentMarkdown = stringifier.stringify(node as any);// eslint-disable-line @typescript-eslint/no-explicit-any
|
|
|
|
|
- contentBuffer.push(contentMarkdown.trim());
|
|
|
|
|
|
|
+ // Add non-heading content to the buffer
|
|
|
|
|
+ const contentMarkdown = stringifier.stringify(node as any).trim(); // eslint-disable-line @typescript-eslint/no-explicit-any
|
|
|
|
|
+ if (contentMarkdown !== '') {
|
|
|
|
|
+ contentBuffer.push(contentMarkdown);
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -106,5 +119,12 @@ export async function splitMarkdownIntoChunks(markdownText: string): Promise<Chu
|
|
|
addContentChunk(chunks, contentBuffer, contentLabel);
|
|
addContentChunk(chunks, contentBuffer, contentLabel);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ if (frontmatter) {
|
|
|
|
|
+ chunks.unshift({
|
|
|
|
|
+ label: 'frontmatter',
|
|
|
|
|
+ text: JSON.stringify(frontmatter, null, 2),
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
return chunks;
|
|
return chunks;
|
|
|
}
|
|
}
|