Browse Source

ignore # in code block

nHigashiWeseek 1 year ago
parent
commit
0c927d012f
1 changed files with 13 additions and 2 deletions
  1. 13 2
      packages/markdown-splitter/src/services/markdown-splitter.ts

+ 13 - 2
packages/markdown-splitter/src/services/markdown-splitter.ts

@@ -63,11 +63,21 @@ export function splitMarkdownIntoChunks(markdown: string): Chunk[] {
   const contentLines: string[] = [];
   const contentLines: string[] = [];
   let currentLabel = '';
   let currentLabel = '';
   let previousLineEmpty = false;
   let previousLineEmpty = false;
-
+  let inCodeBlock = false;
   for (const line of lines) {
   for (const line of lines) {
     const trimmedLine = line.trim();
     const trimmedLine = line.trim();
 
 
-    if (trimmedLine.startsWith('#')) {
+    if (trimmedLine.startsWith('```')) {
+      inCodeBlock = !inCodeBlock;
+      contentLines.push(line);
+      previousLineEmpty = false;
+    }
+    else if (inCodeBlock) {
+      // Inside code block, add line to content
+      contentLines.push(line);
+      previousLineEmpty = false;
+    }
+    else if (trimmedLine.startsWith('#')) {
       // Process any pending content before starting a new section
       // Process any pending content before starting a new section
       if (contentLines.length > 0) {
       if (contentLines.length > 0) {
         const contentLabel = currentLabel !== '' ? `${currentLabel}-content` : '0-content';
         const contentLabel = currentLabel !== '' ? `${currentLabel}-content` : '0-content';
@@ -81,6 +91,7 @@ export function splitMarkdownIntoChunks(markdown: string): Chunk[] {
         currentLabel = updateSectionNumbers(sectionNumbers, headingDepth);
         currentLabel = updateSectionNumbers(sectionNumbers, headingDepth);
         chunks.push({ label: `${currentLabel}-heading`, text: line });
         chunks.push({ label: `${currentLabel}-heading`, text: line });
       }
       }
+      previousLineEmpty = false;
     }
     }
     else if (trimmedLine === '') {
     else if (trimmedLine === '') {
       // Handle empty lines to avoid multiple consecutive empty lines
       // Handle empty lines to avoid multiple consecutive empty lines