Jelajahi Sumber

Relocate Modules (packages/markdown-splitter -> features/openai)

Shun Miyazawa 1 tahun lalu
induk
melakukan
d7db3a1cdb

+ 0 - 1
.changeset/config.json

@@ -15,7 +15,6 @@
     "@growi/app",
     "@growi/slackbot-proxy",
     "@growi/custom-icons",
-    "@growi/markdown-splitter",
     "@growi/editor",
     "@growi/presentation",
     "@growi/preset-*",

+ 0 - 1
apps/app/next.config.js

@@ -73,7 +73,6 @@ const getTranspilePackages = () => {
 const optimizePackageImports = [
   '@growi/core',
   '@growi/editor',
-  '@growi/markdown-splitter',
   '@growi/pluginkit',
   '@growi/presentation',
   '@growi/preset-themes',

+ 1 - 1
apps/app/package.json

@@ -126,6 +126,7 @@
     "i18next-resources-to-backend": "^1.2.1",
     "is-absolute-url": "^4.0.1",
     "is-iso-date": "^0.0.1",
+    "js-tiktoken": "^1.0.15",
     "ldapjs": "^3.0.2",
     "lucene-query-parser": "^1.2.0",
     "markdown-table": "^3.0.3",
@@ -222,7 +223,6 @@
     "@growi/core-styles": "link:../../packages/core-styles",
     "@growi/custom-icons": "link:../../packages/custom-icons",
     "@growi/editor": "link:../../packages/editor",
-    "@growi/markdown-splitter": "link:../../packages/markdown-splitter",
     "@growi/ui": "link:../../packages/ui",
     "@handsontable/react": "=2.1.0",
     "@next/bundle-analyzer": "^14.1.3",

+ 0 - 0
packages/markdown-splitter/src/services/markdown-splitter.spec.ts → apps/app/src/features/openai/server/services/markdown-splitter/markdown-splitter.spec.ts


+ 0 - 0
packages/markdown-splitter/src/services/markdown-splitter.ts → apps/app/src/features/openai/server/services/markdown-splitter/markdown-splitter.ts


+ 0 - 0
packages/markdown-splitter/src/services/markdown-token-splitter.spec.ts → apps/app/src/features/openai/server/services/markdown-splitter/markdown-token-splitter.spec.ts


+ 0 - 0
packages/markdown-splitter/src/services/markdown-token-splitter.ts → apps/app/src/features/openai/server/services/markdown-splitter/markdown-token-splitter.ts


+ 9 - 0
apps/app/src/features/openai/server/services/openai.ts

@@ -21,6 +21,7 @@ import loggerFactory from '~/utils/logger';
 import { OpenaiServiceTypes } from '../../interfaces/ai';
 
 import { getClient } from './client-delegator';
+import { splitMarkdownIntoChunks } from './markdown-splitter/markdown-token-splitter';
 import { oepnaiApiErrorHandler } from './openai-api-error-handler';
 
 const BATCH_SIZE = 100;
@@ -135,6 +136,12 @@ class OpenaiService implements IOpenaiService {
   }
 
   private async uploadFile(pageId: Types.ObjectId, body: string): Promise<OpenAI.Files.FileObject> {
+
+    // const splitMarkdownIntoChunks = (await import('./markdown-splitter/markdown-token-splitter')).splitMarkdownIntoChunks;
+    const chunks = await splitMarkdownIntoChunks(body, 'gpt-4o');
+    console.log('chunks', chunks);
+
+
     const file = await toFile(Readable.from(body), `${pageId}.md`);
     const uploadedFile = await this.client.uploadFile(file);
     return uploadedFile;
@@ -173,6 +180,8 @@ class OpenaiService implements IOpenaiService {
     const vectorStoreFileRelations = Array.from(vectorStoreFileRelationsMap.values());
     const uploadedFileIds = vectorStoreFileRelations.map(data => data.fileIds).flat();
 
+    console.log('uploadedFileIds', uploadedFileIds);
+
     if (uploadedFileIds.length === 0) {
       return;
     }

+ 0 - 2
packages/markdown-splitter/.eslintignore

@@ -1,2 +0,0 @@
-/dist/**
-/types/**

+ 0 - 5
packages/markdown-splitter/.eslintrc.cjs

@@ -1,5 +0,0 @@
-module.exports = {
-  extends: [
-    'weseek/react',
-  ],
-};

+ 0 - 1
packages/markdown-splitter/.gitignore

@@ -1 +0,0 @@
-/dist

+ 0 - 49
packages/markdown-splitter/package.json

@@ -1,49 +0,0 @@
-{
-  "name": "@growi/markdown-splitter",
-  "version": "1.0.0",
-  "license": "MIT",
-  "private": "true",
-  "type": "module",
-  "module": "dist/index.js",
-  "types": "dist/index.d.ts",
-  "files": [
-    "dist"
-  ],
-  "main": "dist/index.cjs",
-  "exports": {
-    ".": {
-      "import": "./dist/index.js",
-      "require": "./dist/index.cjs"
-    }
-  },
-  "scripts": {
-    "build": "vite build",
-    "clean": "shx rm -rf dist",
-    "dev": "vite build --mode dev",
-    "watch": "yarn dev -w --emptyOutDir=false",
-    "lint:js": "yarn eslint **/*.{js,ts}",
-    "lint:typecheck": "tsc",
-    "lint": "npm-run-all -p lint:*",
-    "test": "vitest run --coverage"
-  },
-  "dependencies": {
-    "js-tiktoken": "^1.0.15",
-    "js-yaml": "^4.1.0",
-    "remark-frontmatter": "^5.0.0",
-    "remark-gfm": "^4.0.0",
-    "remark-parse": "^11.0.0",
-    "remark-stringify": "^11.0.0",
-    "unified": "^11.0.0"
-  },
-  "devDependencies": {
-    "@types/js-yaml": "^4.0.9",
-    "eslint-plugin-regex": "^1.8.0",
-    "hast-util-sanitize": "^4.1.0",
-    "pako": "^2.1.0",
-    "throttle-debounce": "^5.0.0"
-  },
-  "peerDependencies": {
-    "react": "^18.2.0",
-    "react-dom": "^18.2.0"
-  }
-}

+ 0 - 2
packages/markdown-splitter/src/index.ts

@@ -1,2 +0,0 @@
-export * from './services/markdown-splitter';
-export * from './services/markdown-token-splitter';

+ 0 - 16
packages/markdown-splitter/tsconfig.json

@@ -1,16 +0,0 @@
-{
-  "$schema": "http://json.schemastore.org/tsconfig",
-  "extends": "../../tsconfig.base.json",
-  "compilerOptions": {
-    "baseUrl": ".",
-    "paths": {
-      "~/*": ["./src/*"]
-    },
-    "types": [
-      "vitest/globals"
-    ]
-  },
-  "include": [
-    "src", "test"
-  ]
-}

+ 0 - 39
packages/markdown-splitter/vite.config.ts

@@ -1,39 +0,0 @@
-import path from 'path';
-
-import glob from 'glob';
-import { nodeExternals } from 'rollup-plugin-node-externals';
-import { defineConfig } from 'vite';
-import dts from 'vite-plugin-dts';
-
-// https://vitejs.dev/config/
-export default defineConfig({
-  plugins: [
-    dts({
-      copyDtsFiles: true,
-    }),
-    {
-      ...nodeExternals({
-        devDeps: true,
-        builtinsPrefix: 'ignore',
-      }),
-      enforce: 'pre',
-    },
-  ],
-  build: {
-    outDir: 'dist',
-    sourcemap: true,
-    lib: {
-      entry: glob.sync(path.resolve(__dirname, 'src/**/*.ts'), {
-        ignore: '**/*.spec.ts',
-      }),
-      name: 'core-libs',
-      formats: ['es', 'cjs'],
-    },
-    rollupOptions: {
-      output: {
-        preserveModules: true,
-        preserveModulesRoot: 'src',
-      },
-    },
-  },
-});

+ 0 - 25
packages/markdown-splitter/vitest.config.ts

@@ -1,25 +0,0 @@
-import tsconfigPaths from 'vite-tsconfig-paths';
-import { defineConfig, coverageConfigDefaults } from 'vitest/config';
-
-export default defineConfig({
-  plugins: [
-    tsconfigPaths(),
-  ],
-  test: {
-    environment: 'node',
-    clearMocks: true,
-    globals: true,
-    coverage: {
-      exclude: [
-        ...coverageConfigDefaults.exclude,
-        'src/**/index.ts',
-      ],
-      thresholds: {
-        statements: 100,
-        branches: 100,
-        lines: 100,
-        functions: 100,
-      },
-    },
-  },
-});

+ 0 - 23
yarn.lock

@@ -2255,17 +2255,6 @@
     react "^18.2.0"
     react-dom "^18.2.0"
 
-"@growi/markdown-splitter@link:packages/markdown-splitter":
-  version "1.0.0"
-  dependencies:
-    js-tiktoken "^1.0.15"
-    js-yaml "^4.1.0"
-    remark-frontmatter "^5.0.0"
-    remark-gfm "^4.0.0"
-    remark-parse "^11.0.0"
-    remark-stringify "^11.0.0"
-    unified "^11.0.0"
-
 "@growi/pluginkit@link:packages/pluginkit":
   version "1.0.1"
   dependencies:
@@ -4604,11 +4593,6 @@
     expect "^29.0.0"
     pretty-format "^29.0.0"
 
-"@types/js-yaml@^4.0.9":
-  version "4.0.9"
-  resolved "https://registry.yarnpkg.com/@types/js-yaml/-/js-yaml-4.0.9.tgz#cd82382c4f902fed9691a2ed79ec68c5898af4c2"
-  integrity sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==
-
 "@types/json-schema@*", "@types/json-schema@^7.0.12", "@types/json-schema@^7.0.5", "@types/json-schema@^7.0.6", "@types/json-schema@^7.0.8", "@types/json-schema@^7.0.9":
   version "7.0.15"
   resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.15.tgz#596a1747233694d50f6ad8a7869fcb6f56cf5841"
@@ -10276,13 +10260,6 @@ hast-util-raw@^9.0.0:
     web-namespaces "^2.0.0"
     zwitch "^2.0.0"
 
-hast-util-sanitize@^4.1.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/hast-util-sanitize/-/hast-util-sanitize-4.1.0.tgz#d90f8521f5083547095c5c63a7e03150303e0286"
-  integrity sha512-Hd9tU0ltknMGRDv+d6Ro/4XKzBqQnP/EZrpiTbpFYfXv/uOhWeKc+2uajcbEvAEH98VZd7eII2PiXm13RihnLw==
-  dependencies:
-    "@types/hast" "^2.0.0"
-
 hast-util-sanitize@^5.0.0, hast-util-sanitize@^5.0.1:
   version "5.0.1"
   resolved "https://registry.yarnpkg.com/hast-util-sanitize/-/hast-util-sanitize-5.0.1.tgz#8e90068cd68e651c569960b77a1b25076579b4cf"