nHigashiWeseek 1 год назад
Родитель
Сommit
a13e44a870

+ 1 - 1
.changeset/config.json

@@ -15,7 +15,7 @@
     "@growi/app",
     "@growi/slackbot-proxy",
     "@growi/custom-icons",
-    "@growi/markdown-token-splitter",
+    "@growi/markdown-splitter",
     "@growi/editor",
     "@growi/presentation",
     "@growi/preset-*",

+ 1 - 1
apps/app/next.config.js

@@ -65,7 +65,7 @@ const getTranspilePackages = () => {
 const optimizePackageImports = [
   '@growi/core',
   '@growi/editor',
-  '@growi/markdown-token-splitter',
+  '@growi/markdown-splitter',
   '@growi/pluginkit',
   '@growi/presentation',
   '@growi/preset-themes',

+ 1 - 1
apps/app/package.json

@@ -220,7 +220,7 @@
   "devDependencies": {
     "@growi/core-styles": "link:../../packages/core-styles",
     "@growi/custom-icons": "link:../../packages/custom-icons",
-    "@growi/markdown-token-splitter": "link:../../packages/markdown-token-splitter",
+    "@growi/markdown-splitter": "link:../../packages/markdown-splitter",
     "@growi/editor": "link:../../packages/editor",
     "@growi/ui": "link:../../packages/ui",
     "@handsontable/react": "=2.1.0",

+ 0 - 0
packages/markdown-token-splitter/.eslintignore → packages/markdown-splitter/.eslintignore


+ 0 - 0
packages/markdown-token-splitter/.eslintrc.cjs → packages/markdown-splitter/.eslintrc.cjs


+ 0 - 0
packages/markdown-token-splitter/.gitignore → packages/markdown-splitter/.gitignore


+ 1 - 1
packages/markdown-token-splitter/package.json → packages/markdown-splitter/package.json

@@ -1,5 +1,5 @@
 {
-  "name": "@growi/markdown-token-splitter",
+  "name": "@growi/markdown-splitter",
   "version": "1.0.0",
   "license": "MIT",
   "private": "true",

+ 1 - 0
packages/markdown-splitter/src/index.ts

@@ -0,0 +1 @@
+export * from './services/markdown-splitter';

+ 2 - 1
packages/markdown-token-splitter/src/services/markdown-token-splitter.ts → packages/markdown-splitter/src/services/markdown-splitter.ts

@@ -1,4 +1,5 @@
 import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
+import type { Document } from 'langchain/document';
 
 /**
  * Function to recursively split a markdown string by header sections (and within subsections if they exceed the specified max token count).
@@ -7,7 +8,7 @@ import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
  * @param chunkSize - The chunk size for splitting (default is 1000)
  * @returns An array of split markdown sections
  */
-export async function splitMarkdownByTokens(
+export async function splitMarkdownIntoChunks(
     markdownString: string,
     chunkSize = 1000, // Default chunk size set to 1000
 ): Promise<Document[]> {

+ 9 - 9
packages/markdown-token-splitter/test/index.test.js → packages/markdown-splitter/test/index.test.js

@@ -1,8 +1,8 @@
 import { describe, it, expect } from 'vitest';
 
-import { splitMarkdownByTokens } from '../src/services/markdown-token-splitter';
+import { splitMarkdownIntoChunks } from '../src/services/markdown-splitter';
 
-describe('splitMarkdownByTokens', () => {
+describe('splitMarkdownIntoChunks', () => {
 
   it('should split markdown into sections using the specified chunk size', async() => {
     const markdown = `
@@ -19,7 +19,7 @@ This is some content under heading 3.
 This is some content under heading 4.
 `;
     const chunkSize = 60;
-    const result = await splitMarkdownByTokens(markdown, chunkSize);
+    const result = await splitMarkdownIntoChunks(markdown, chunkSize);
 
     // Expect the result to have more than one section due to chunkSize limitations
     expect(result.length).toBeGreaterThan(1);
@@ -33,7 +33,7 @@ This is some content under heading 4.
 This is some content without any headers. It should not be split unless it exceeds the chunk size.
 `;
     const chunkSize = 100;
-    const result = await splitMarkdownByTokens(markdown, chunkSize);
+    const result = await splitMarkdownIntoChunks(markdown, chunkSize);
 
     // Since the content is short, expect no splits
     expect(result.length).toBe(1);
@@ -46,7 +46,7 @@ This is some content without any headers. It should not be split unless it excee
 ${'This is some repetitive content. '.repeat(50)}
 `;
     const chunkSize = 100;
-    const result = await splitMarkdownByTokens(markdown, chunkSize);
+    const result = await splitMarkdownIntoChunks(markdown, chunkSize);
 
     expect(result.length).toBeGreaterThan(1);
     for (const section of result) {
@@ -57,7 +57,7 @@ ${'This is some repetitive content. '.repeat(50)}
   it('should handle empty markdown input', async() => {
     const markdown = '';
     const chunkSize = 10;
-    const result = await splitMarkdownByTokens(markdown, chunkSize);
+    const result = await splitMarkdownIntoChunks(markdown, chunkSize);
 
     // Expect an empty result for empty markdown input
     expect(result.length).toBe(0);
@@ -75,7 +75,7 @@ Content under subheading 1.1.
 Content under heading 2.
 `;
     const chunkSize = 50;
-    const result = await splitMarkdownByTokens(markdown, chunkSize);
+    const result = await splitMarkdownIntoChunks(markdown, chunkSize);
 
     // Expect multiple sections
     expect(result.length).toBeGreaterThan(1);
@@ -90,7 +90,7 @@ Content under heading 2.
 Short content.
 `;
     const chunkSize = 100;
-    const result = await splitMarkdownByTokens(markdown, chunkSize);
+    const result = await splitMarkdownIntoChunks(markdown, chunkSize);
 
     // Expect the result to be a single section since the content is small
     expect(result.length).toBe(1);
@@ -108,7 +108,7 @@ Short content.
 # Heading 4
 `;
     const chunkSize = 50;
-    const result = await splitMarkdownByTokens(markdown, chunkSize);
+    const result = await splitMarkdownIntoChunks(markdown, chunkSize);
 
     // Expect each heading to be treated as a separate section
     expect(result.length).toBeGreaterThan(1);

+ 0 - 0
packages/markdown-token-splitter/tsconfig.json → packages/markdown-splitter/tsconfig.json


+ 0 - 0
packages/markdown-token-splitter/vite.config.ts → packages/markdown-splitter/vite.config.ts


+ 0 - 1
packages/markdown-token-splitter/src/index.ts

@@ -1 +0,0 @@
-export * from './services/markdown-token-splitter';

+ 1 - 1
yarn.lock

@@ -2179,7 +2179,7 @@
     react "^18.2.0"
     react-dom "^18.2.0"
 
-"@growi/markdown-token-splitter@link:packages/markdown-token-splitter":
+"@growi/markdown-splitter@link:packages/markdown-splitter":
   version "1.0.0"
   dependencies:
     "@langchain/core" "^0.3.5"