1 год назад · cb56e0c785
--- a/.changeset/config.json
+++ b/.changeset/config.json
@@ -15,6 +15,7 @@
 
															     "@growi/app",
														
 
															     "@growi/slackbot-proxy",
														
 
															     "@growi/custom-icons",
														
 
															+    "@growi/markdown-splitter",
														
 
															     "@growi/editor",
														
 
															     "@growi/presentation",
														
 
															     "@growi/preset-*",
														
--- a/apps/app/next.config.js
+++ b/apps/app/next.config.js
@@ -73,6 +73,7 @@ const getTranspilePackages = () => {
 
															 const optimizePackageImports = [
														
 
															   '@growi/core',
														
 
															   '@growi/editor',
														
 
															+  '@growi/markdown-splitter',
														
 
															   '@growi/pluginkit',
														
 
															   '@growi/presentation',
														
 
															   '@growi/preset-themes',
														
--- a/apps/app/package.json
+++ b/apps/app/package.json
@@ -222,6 +222,7 @@
 
															     "@growi/core-styles": "link:../../packages/core-styles",
														
 
															     "@growi/custom-icons": "link:../../packages/custom-icons",
														
 
															     "@growi/editor": "link:../../packages/editor",
														
 
															+    "@growi/markdown-splitter": "link:../../packages/markdown-splitter",
														
 
															     "@growi/ui": "link:../../packages/ui",
														
 
															     "@handsontable/react": "=2.1.0",
														
 
															     "@next/bundle-analyzer": "^14.1.3",
														
--- a/packages/markdown-splitter/.eslintignore
+++ b/packages/markdown-splitter/.eslintignore
@@ -0,0 +1,2 @@
 
															+/dist/**
														
 
															+/types/**
														
--- a/packages/markdown-splitter/.eslintrc.cjs
+++ b/packages/markdown-splitter/.eslintrc.cjs
@@ -0,0 +1,5 @@
 
															+module.exports = {
														
 
															+  extends: [
														
 
															+    'weseek/react',
														
 
															+  ],
														
 
															+};
														
--- a/packages/markdown-splitter/.gitignore
+++ b/packages/markdown-splitter/.gitignore
@@ -0,0 +1 @@
 
															+/dist
														
--- a/packages/markdown-splitter/package.json
+++ b/packages/markdown-splitter/package.json
@@ -0,0 +1,43 @@
 
															+{
														
 
															+  "name": "@growi/markdown-splitter",
														
 
															+  "version": "1.0.0",
														
 
															+  "license": "MIT",
														
 
															+  "private": "true",
														
 
															+  "type": "module",
														
 
															+  "module": "dist/index.js",
														
 
															+  "types": "dist/index.d.ts",
														
 
															+  "files": [
														
 
															+    "dist"
														
 
															+  ],
														
 
															+  "main": "dist/index.cjs",
														
 
															+  "exports": {
														
 
															+    ".": {
														
 
															+      "import": "./dist/index.js",
														
 
															+      "require": "./dist/index.cjs"
														
 
															+    }
														
 
															+  },
														
 
															+  "scripts": {
														
 
															+    "build": "vite build",
														
 
															+    "clean": "shx rm -rf dist",
														
 
															+    "dev": "vite build --mode dev",
														
 
															+    "watch": "yarn dev -w --emptyOutDir=false",
														
 
															+    "lint:js": "yarn eslint **/*.{js,ts}",
														
 
															+    "lint:typecheck": "tsc",
														
 
															+    "lint": "npm-run-all -p lint:*",
														
 
															+    "test": "vitest run --coverage"
														
 
															+  },
														
 
															+  "devDependencies": {
														
 
															+    "eslint-plugin-regex": "^1.8.0",
														
 
															+    "hast-util-sanitize": "^4.1.0",
														
 
															+    "pako": "^2.1.0",
														
 
															+    "throttle-debounce": "^5.0.0",
														
 
															+    "unified": "^10.1.2",
														
 
															+    "unist-util-visit": "^4.0.0"
														
 
															+  },
														
 
															+  "peerDependencies": {
														
 
															+    "react": "^18.2.0",
														
 
															+    "react-dom": "^18.2.0"
														
 
															+  },
														
 
															+  "dependencies": {
														
 
															+  }
														
 
															+}
														
--- a/packages/markdown-splitter/src/index.ts
+++ b/packages/markdown-splitter/src/index.ts
@@ -0,0 +1 @@
 
															+export * from './services/markdown-splitter';
														
--- a/packages/markdown-splitter/src/services/markdown-splitter.ts
+++ b/packages/markdown-splitter/src/services/markdown-splitter.ts
@@ -0,0 +1,106 @@
 
															+export type Chunk = {
														
 
															+  label: string;
														
 
															+  text: string;
														
 
															+};
														
 
															+
														
 
															+/**
														
 
															+ * Processes and adds a new chunk to the chunks array if content is not empty.
														
 
															+ * Clears the contentLines array after processing.
														
 
															+ * @param chunks - The array to store chunks.
														
 
															+ * @param contentLines - The array of content lines.
														
 
															+ * @param label - The label for the content chunk.
														
 
															+ */
														
 
															+function processPendingContent(chunks: Chunk[], contentLines: string[], label: string) {
														
 
															+  const text = contentLines.join('\n').trimEnd();
														
 
															+  if (text !== '') {
														
 
															+    chunks.push({ label, text });
														
 
															+  }
														
 
															+  contentLines.length = 0; // Clear the contentLines array
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * Updates the section numbers based on the heading depth and returns the updated section label.
														
 
															+ * Handles non-consecutive heading levels by initializing missing levels with 1.
														
 
															+ * @param sectionNumbers - The current section numbers.
														
 
															+ * @param depth - The depth of the heading (e.g., # is depth 1).
														
 
															+ * @returns The updated section label.
														
 
															+ */
														
 
															+function updateSectionNumbers(sectionNumbers: number[], depth: number): string {
														
 
															+  if (depth > sectionNumbers.length) {
														
 
															+    // If depth increases, initialize missing levels with 1
														
 
															+    while (sectionNumbers.length < depth) {
														
 
															+      sectionNumbers.push(1);
														
 
															+    }
														
 
															+  }
														
 
															+  else if (depth === sectionNumbers.length) {
														
 
															+    // Same level, increment the last number
														
 
															+    sectionNumbers[depth - 1]++;
														
 
															+  }
														
 
															+  else {
														
 
															+    // Depth decreases, remove deeper levels and increment current level
														
 
															+    sectionNumbers.splice(depth);
														
 
															+    sectionNumbers[depth - 1]++;
														
 
															+  }
														
 
															+  return sectionNumbers.join('-');
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * Splits Markdown text into labeled chunks, considering content that may start before any headers
														
 
															+ * and handling non-consecutive heading levels. Preserves list indentation and leading spaces while
														
 
															+ * reducing unnecessary line breaks. Ensures that no empty line is added between sections.
														
 
															+ * @param markdown - The input Markdown string.
														
 
															+ * @returns An array of labeled chunks.
														
 
															+ */
														
 
															+export function splitMarkdownIntoChunks(markdown: string): Chunk[] {
														
 
															+  const chunks: Chunk[] = [];
														
 
															+  const sectionNumbers: number[] = [];
														
 
															+
														
 
															+  if (typeof markdown !== 'string' || markdown.trim() === '') {
														
 
															+    return chunks;
														
 
															+  }
														
 
															+
														
 
															+  const lines = markdown.split('\n');
														
 
															+  const contentLines: string[] = [];
														
 
															+  let currentLabel = '';
														
 
															+  let previousLineEmpty = false;
														
 
															+
														
 
															+  for (const line of lines) {
														
 
															+    const trimmedLine = line.trim();
														
 
															+
														
 
															+    if (trimmedLine.startsWith('#')) {
														
 
															+      // Process any pending content before starting a new section
														
 
															+      if (contentLines.length > 0) {
														
 
															+        const contentLabel = currentLabel !== '' ? `${currentLabel}-content` : '0-content';
														
 
															+        processPendingContent(chunks, contentLines, contentLabel);
														
 
															+      }
														
 
															+
														
 
															+      // Match heading level and text
														
 
															+      const headerMatch = trimmedLine.match(/^(#+)\s+(.*)/);
														
 
															+      if (headerMatch) {
														
 
															+        const headingDepth = headerMatch[1].length;
														
 
															+        currentLabel = updateSectionNumbers(sectionNumbers, headingDepth);
														
 
															+        chunks.push({ label: `${currentLabel}-heading`, text: line });
														
 
															+      }
														
 
															+    }
														
 
															+    else if (trimmedLine === '') {
														
 
															+      // Handle empty lines to avoid multiple consecutive empty lines
														
 
															+      if (!previousLineEmpty && contentLines.length > 0) {
														
 
															+        contentLines.push('');
														
 
															+        previousLineEmpty = true;
														
 
															+      }
														
 
															+    }
														
 
															+    else {
														
 
															+      // Add non-empty lines to the current content
														
 
															+      contentLines.push(line);
														
 
															+      previousLineEmpty = false;
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  // Process any remaining content after the last line
														
 
															+  if (contentLines.length > 0) {
														
 
															+    const contentLabel = currentLabel !== '' ? `${currentLabel}-content` : '0-content';
														
 
															+    processPendingContent(chunks, contentLines, contentLabel);
														
 
															+  }
														
 
															+
														
 
															+  return chunks;
														
 
															+}
														
--- a/packages/markdown-splitter/test/index.spec.ts
+++ b/packages/markdown-splitter/test/index.spec.ts
@@ -0,0 +1,252 @@
 
															+import type { Chunk } from '../src/services/markdown-splitter';
														
 
															+import { splitMarkdownIntoChunks } from '../src/services/markdown-splitter';
														
 
															+
														
 
															+describe('splitMarkdownIntoChunks', () => {
														
 
															+
														
 
															+  test('handles empty markdown string', () => {
														
 
															+    const markdown = '';
														
 
															+    const expected: Chunk[] = [];
														
 
															+    const result = splitMarkdownIntoChunks(markdown);
														
 
															+    expect(result).toEqual(expected);
														
 
															+  });
														
 
															+
														
 
															+  test('handles markdown with only content and no headers', () => {
														
 
															+    const markdown = `This is some content without any headers.
														
 
															+It spans multiple lines.
														
 
															+
														
 
															+Another paragraph.
														
 
															+    `;
														
 
															+    const expected: Chunk[] = [
														
 
															+      {
														
 
															+        label: '0-content',
														
 
															+        text: 'This is some content without any headers.\nIt spans multiple lines.\n\nAnother paragraph.',
														
 
															+      },
														
 
															+    ];
														
 
															+    const result = splitMarkdownIntoChunks(markdown);
														
 
															+    expect(result).toEqual(expected);
														
 
															+  });
														
 
															+
														
 
															+  test('handles markdown starting with a header', () => {
														
 
															+    const markdown = `
														
 
															+# Header 1
														
 
															+Content under header 1.
														
 
															+
														
 
															+## Header 1.1
														
 
															+Content under header 1.1.
														
 
															+
														
 
															+# Header 2
														
 
															+Content under header 2.
														
 
															+    `;
														
 
															+    const expected: Chunk[] = [
														
 
															+      { label: '1-heading', text: '# Header 1' },
														
 
															+      { label: '1-content', text: 'Content under header 1.' },
														
 
															+      { label: '1-1-heading', text: '## Header 1.1' },
														
 
															+      { label: '1-1-content', text: 'Content under header 1.1.' },
														
 
															+      { label: '2-heading', text: '# Header 2' },
														
 
															+      { label: '2-content', text: 'Content under header 2.' },
														
 
															+    ];
														
 
															+    const result = splitMarkdownIntoChunks(markdown);
														
 
															+    expect(result).toEqual(expected);
														
 
															+  });
														
 
															+
														
 
															+  test('handles markdown with non-consecutive heading levels', () => {
														
 
															+    const markdown = `
														
 
															+Introduction without a header.
														
 
															+
														
 
															+# Chapter 1
														
 
															+Content of chapter 1.
														
 
															+
														
 
															+### Section 1.1.1
														
 
															+Content of section 1.1.1.
														
 
															+
														
 
															+## Section 1.2
														
 
															+Content of section 1.2.
														
 
															+
														
 
															+# Chapter 2
														
 
															+Content of chapter 2.
														
 
															+
														
 
															+## Section 2.1
														
 
															+Content of section 2.1.
														
 
															+    `;
														
 
															+    const expected: Chunk[] = [
														
 
															+      {
														
 
															+        label: '0-content',
														
 
															+        text: 'Introduction without a header.',
														
 
															+      },
														
 
															+      {
														
 
															+        label: '1-heading',
														
 
															+        text: '# Chapter 1',
														
 
															+      },
														
 
															+      {
														
 
															+        label: '1-content',
														
 
															+        text: 'Content of chapter 1.',
														
 
															+      },
														
 
															+      {
														
 
															+        label: '1-1-1-heading',
														
 
															+        text: '### Section 1.1.1',
														
 
															+      },
														
 
															+      {
														
 
															+        label: '1-1-1-content',
														
 
															+        text: 'Content of section 1.1.1.',
														
 
															+      },
														
 
															+      {
														
 
															+        label: '1-2-heading',
														
 
															+        text: '## Section 1.2',
														
 
															+      },
														
 
															+      {
														
 
															+        label: '1-2-content',
														
 
															+        text: 'Content of section 1.2.',
														
 
															+      },
														
 
															+      {
														
 
															+        label: '2-heading',
														
 
															+        text: '# Chapter 2',
														
 
															+      },
														
 
															+      {
														
 
															+        label: '2-content',
														
 
															+        text: 'Content of chapter 2.',
														
 
															+      },
														
 
															+      {
														
 
															+        label: '2-1-heading',
														
 
															+        text: '## Section 2.1',
														
 
															+      },
														
 
															+      {
														
 
															+        label: '2-1-content',
														
 
															+        text: 'Content of section 2.1.',
														
 
															+      },
														
 
															+    ];
														
 
															+    const result = splitMarkdownIntoChunks(markdown);
														
 
															+    expect(result).toEqual(expected);
														
 
															+  });
														
 
															+
														
 
															+  test('handles markdown with skipped heading levels', () => {
														
 
															+    const markdown = `
														
 
															+# Header 1
														
 
															+Content under header 1.
														
 
															+
														
 
															+#### Header 1.1.1.1
														
 
															+Content under header 1.1.1.1.
														
 
															+
														
 
															+## Header 1.2
														
 
															+Content under header 1.2.
														
 
															+
														
 
															+# Header 2
														
 
															+Content under header 2.
														
 
															+    `;
														
 
															+    const expected: Chunk[] = [
														
 
															+      { label: '1-heading', text: '# Header 1' },
														
 
															+      { label: '1-content', text: 'Content under header 1.' },
														
 
															+      { label: '1-1-1-1-heading', text: '#### Header 1.1.1.1' },
														
 
															+      { label: '1-1-1-1-content', text: 'Content under header 1.1.1.1.' },
														
 
															+      { label: '1-2-heading', text: '## Header 1.2' },
														
 
															+      { label: '1-2-content', text: 'Content under header 1.2.' },
														
 
															+      { label: '2-heading', text: '# Header 2' },
														
 
															+      { label: '2-content', text: 'Content under header 2.' },
														
 
															+    ];
														
 
															+    const result = splitMarkdownIntoChunks(markdown);
														
 
															+    expect(result).toEqual(expected);
														
 
															+  });
														
 
															+
														
 
															+  test('handles malformed headings', () => {
														
 
															+    const markdown = `
														
 
															+# Header 1
														
 
															+Content under header 1.
														
 
															+
														
 
															+#### Header 1.1.1.1
														
 
															+Content under header 1.1.1.1.
														
 
															+    `;
														
 
															+    const expected: Chunk[] = [
														
 
															+      { label: '1-heading', text: '# Header 1' },
														
 
															+      { label: '1-content', text: 'Content under header 1.' },
														
 
															+      { label: '1-1-1-1-heading', text: '#### Header 1.1.1.1' },
														
 
															+      { label: '1-1-1-1-content', text: 'Content under header 1.1.1.1.' },
														
 
															+    ];
														
 
															+    const result = splitMarkdownIntoChunks(markdown);
														
 
															+    expect(result).toEqual(expected);
														
 
															+  });
														
 
															+
														
 
															+  test('handles multiple content blocks before any headers', () => {
														
 
															+    const markdown = `
														
 
															+This is the first paragraph without a header.
														
 
															+
														
 
															+This is the second paragraph without a header.
														
 
															+
														
 
															+# Header 1
														
 
															+Content under header 1.
														
 
															+    `;
														
 
															+    const expected: Chunk[] = [
														
 
															+      {
														
 
															+        label: '0-content',
														
 
															+        text: 'This is the first paragraph without a header.\n\nThis is the second paragraph without a header.',
														
 
															+      },
														
 
															+      { label: '1-heading', text: '# Header 1' },
														
 
															+      { label: '1-content', text: 'Content under header 1.' },
														
 
															+    ];
														
 
															+    const result = splitMarkdownIntoChunks(markdown);
														
 
															+    expect(result).toEqual(expected);
														
 
															+  });
														
 
															+
														
 
															+  test('handles markdown with only headers and no content', () => {
														
 
															+    const markdown = `
														
 
															+# Header 1
														
 
															+
														
 
															+## Header 1.1
														
 
															+
														
 
															+### Header 1.1.1
														
 
															+    `;
														
 
															+    const expected: Chunk[] = [
														
 
															+      { label: '1-heading', text: '# Header 1' },
														
 
															+      { label: '1-1-heading', text: '## Header 1.1' },
														
 
															+      { label: '1-1-1-heading', text: '### Header 1.1.1' },
														
 
															+    ];
														
 
															+    const result = splitMarkdownIntoChunks(markdown);
														
 
															+    expect(result).toEqual(expected);
														
 
															+  });
														
 
															+
														
 
															+  test('handles markdown with mixed content and headers', () => {
														
 
															+    const markdown = `
														
 
															+# Header 1
														
 
															+Content under header 1.
														
 
															+
														
 
															+## Header 1.1
														
 
															+Content under header 1.1.
														
 
															+Another piece of content.
														
 
															+
														
 
															+# Header 2
														
 
															+Content under header 2.
														
 
															+    `;
														
 
															+    const expected: Chunk[] = [
														
 
															+      { label: '1-heading', text: '# Header 1' },
														
 
															+      { label: '1-content', text: 'Content under header 1.' },
														
 
															+      { label: '1-1-heading', text: '## Header 1.1' },
														
 
															+      { label: '1-1-content', text: 'Content under header 1.1.\nAnother piece of content.' },
														
 
															+      { label: '2-heading', text: '# Header 2' },
														
 
															+      { label: '2-content', text: 'Content under header 2.' },
														
 
															+    ];
														
 
															+    const result = splitMarkdownIntoChunks(markdown);
														
 
															+    expect(result).toEqual(expected);
														
 
															+  });
														
 
															+
														
 
															+  test('preserves list indentation and reduces unnecessary line breaks', () => {
														
 
															+    const markdown = `
														
 
															+# Header 1
														
 
															+Content under header 1.
														
 
															+
														
 
															+- Item 1
														
 
															+  - Subitem 1
														
 
															+- Item 2
														
 
															+
														
 
															+
														
 
															+# Header 2
														
 
															+Content under header 2.
														
 
															+    `;
														
 
															+    const expected: Chunk[] = [
														
 
															+      { label: '1-heading', text: '# Header 1' },
														
 
															+      { label: '1-content', text: 'Content under header 1.\n\n- Item 1\n  - Subitem 1\n- Item 2' },
														
 
															+      { label: '2-heading', text: '# Header 2' },
														
 
															+      { label: '2-content', text: 'Content under header 2.' },
														
 
															+    ];
														
 
															+    const result = splitMarkdownIntoChunks(markdown);
														
 
															+    expect(result).toEqual(expected);
														
 
															+  });
														
 
															+
														
 
															+});
														
--- a/packages/markdown-splitter/tsconfig.json
+++ b/packages/markdown-splitter/tsconfig.json
@@ -0,0 +1,16 @@
 
															+{
														
 
															+  "$schema": "http://json.schemastore.org/tsconfig",
														
 
															+  "extends": "../../tsconfig.base.json",
														
 
															+  "compilerOptions": {
														
 
															+    "baseUrl": ".",
														
 
															+    "paths": {
														
 
															+      "~/*": ["./src/*"]
														
 
															+    },
														
 
															+    "types": [
														
 
															+      "vitest/globals"
														
 
															+    ]
														
 
															+  },
														
 
															+  "include": [
														
 
															+    "src", "test"
														
 
															+  ]
														
 
															+}
														
--- a/packages/markdown-splitter/vite.config.ts
+++ b/packages/markdown-splitter/vite.config.ts
@@ -0,0 +1,39 @@
 
															+import path from 'path';
														
 
															+
														
 
															+import glob from 'glob';
														
 
															+import { nodeExternals } from 'rollup-plugin-node-externals';
														
 
															+import { defineConfig } from 'vite';
														
 
															+import dts from 'vite-plugin-dts';
														
 
															+
														
 
															+// https://vitejs.dev/config/
														
 
															+export default defineConfig({
														
 
															+  plugins: [
														
 
															+    dts({
														
 
															+      copyDtsFiles: true,
														
 
															+    }),
														
 
															+    {
														
 
															+      ...nodeExternals({
														
 
															+        devDeps: true,
														
 
															+        builtinsPrefix: 'ignore',
														
 
															+      }),
														
 
															+      enforce: 'pre',
														
 
															+    },
														
 
															+  ],
														
 
															+  build: {
														
 
															+    outDir: 'dist',
														
 
															+    sourcemap: true,
														
 
															+    lib: {
														
 
															+      entry: glob.sync(path.resolve(__dirname, 'src/**/*.ts'), {
														
 
															+        ignore: '**/*.spec.ts',
														
 
															+      }),
														
 
															+      name: 'core-libs',
														
 
															+      formats: ['es', 'cjs'],
														
 
															+    },
														
 
															+    rollupOptions: {
														
 
															+      output: {
														
 
															+        preserveModules: true,
														
 
															+        preserveModulesRoot: 'src',
														
 
															+      },
														
 
															+    },
														
 
															+  },
														
 
															+});
														
--- a/packages/markdown-splitter/vitest.config.ts
+++ b/packages/markdown-splitter/vitest.config.ts
@@ -0,0 +1,25 @@
 
															+import tsconfigPaths from 'vite-tsconfig-paths';
														
 
															+import { defineConfig, coverageConfigDefaults } from 'vitest/config';
														
 
															+
														
 
															+export default defineConfig({
														
 
															+  plugins: [
														
 
															+    tsconfigPaths(),
														
 
															+  ],
														
 
															+  test: {
														
 
															+    environment: 'node',
														
 
															+    clearMocks: true,
														
 
															+    globals: true,
														
 
															+    coverage: {
														
 
															+      exclude: [
														
 
															+        ...coverageConfigDefaults.exclude,
														
 
															+        'src/**/index.ts',
														
 
															+      ],
														
 
															+      thresholds: {
														
 
															+        statements: 100,
														
 
															+        branches: 100,
														
 
															+        lines: 100,
														
 
															+        functions: 100,
														
 
															+      },
														
 
															+    },
														
 
															+  },
														
 
															+});
														
--- a/yarn.lock
+++ b/yarn.lock
@@ -2157,6 +2157,9 @@
 
															     react "^18.2.0"
														
 
															     react-dom "^18.2.0"
														
 
															+"@growi/markdown-splitter@link:packages/markdown-splitter":
														
 
															+  version "1.0.0"
														
 
															+
														
 
															 "@growi/pluginkit@link:packages/pluginkit":
														
 
															   version "1.0.1"
														
 
															   dependencies:
	`@@ -0,0 +1 @@`
			`+export * from './services/markdown-splitter';`