Просмотр исходного кода

Merge pull request #9904 from weseek/feat/165325-implement-predicate-to-check-file-support-by-vectorstore

feat: Implement predicate to check if file is supported by VectorStore
Yuki Takei 11 месяцев назад
Родитель
Сommit
a7acd412cb

+ 3 - 2
apps/app/package.json

@@ -73,6 +73,7 @@
     "@godaddy/terminus": "^4.9.0",
     "@google-cloud/storage": "^5.8.5",
     "@growi/core": "workspace:^",
+    "@growi/pdf-converter-client": "workspace:^",
     "@growi/pluginkit": "workspace:^",
     "@growi/presentation": "workspace:^",
     "@growi/preset-templates": "workspace:^",
@@ -82,20 +83,20 @@
     "@growi/remark-growi-directive": "workspace:^",
     "@growi/remark-lsx": "workspace:^",
     "@growi/slack": "workspace:^",
-    "@growi/pdf-converter-client": "workspace:^",
     "@keycloak/keycloak-admin-client": "^18.0.0",
     "@opentelemetry/api": "^1.9.0",
     "@opentelemetry/auto-instrumentations-node": "^0.55.1",
     "@opentelemetry/exporter-metrics-otlp-grpc": "^0.57.0",
     "@opentelemetry/exporter-trace-otlp-grpc": "^0.57.0",
     "@opentelemetry/resources": "^1.28.0",
-    "@opentelemetry/semantic-conventions": "^1.28.0",
     "@opentelemetry/sdk-metrics": "^1.28.0",
     "@opentelemetry/sdk-node": "^0.57.0",
     "@opentelemetry/sdk-trace-node": "^1.28.0",
+    "@opentelemetry/semantic-conventions": "^1.28.0",
     "@slack/web-api": "^6.2.4",
     "@slack/webhook": "^6.0.0",
     "@types/async": "^3.2.24",
+    "@types/multer": "^1.4.12",
     "JSONStream": "^1.3.5",
     "archiver": "^5.3.0",
     "array.prototype.flatmap": "^1.2.2",

+ 47 - 0
apps/app/src/features/openai/server/utils/is-vector-store-compatible.ts

@@ -0,0 +1,47 @@
+import path from 'path';
+
+// See: https://platform.openai.com/docs/assistants/tools/file-search#supported-files
+const supportedFormats = {
+  '.c': 'text/x-c',
+  '.cpp': 'text/x-c++',
+  '.cs': 'text/x-csharp',
+  '.css': 'text/css',
+  '.doc': 'application/msword',
+  '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+  '.go': 'text/x-golang',
+  '.html': 'text/html',
+  '.java': 'text/x-java',
+  '.js': 'text/javascript',
+  '.json': 'application/json',
+  '.md': 'text/markdown',
+  '.pdf': 'application/pdf',
+  '.php': 'text/x-php',
+  '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+  '.py': ['text/x-python', 'text/x-script.python'],
+  '.rb': 'text/x-ruby',
+  '.sh': 'application/x-sh',
+  '.tex': 'text/x-tex',
+  '.ts': 'application/typescript',
+  '.txt': 'text/plain',
+} as const;
+
+type SupportedExtension = keyof typeof supportedFormats;
+
+export const isVectorStoreCompatible = (file: Express.Multer.File): boolean => {
+  // Get extension
+  const extension = path.extname(file.originalname).toLowerCase();
+
+  // Check if the file extension is supported
+  if (!(extension in supportedFormats)) {
+    return false;
+  }
+
+  // Get Mime Type (At this point, file extension is confirmed to be supported, so type-safe access is possible)
+  const supportedMimeType = supportedFormats[extension as SupportedExtension];
+
+  // Check if the mimeType is supported
+  const mimeType = file.mimetype;
+  return Array.isArray(supportedMimeType)
+    ? supportedMimeType.includes(mimeType)
+    : supportedMimeType === mimeType;
+};

+ 6 - 0
apps/app/src/server/routes/apiv3/attachment.js

@@ -4,6 +4,7 @@ import express from 'express';
 import multer from 'multer';
 import autoReap from 'multer-autoreap';
 
+import { isVectorStoreCompatible } from '~/features/openai/server/utils/is-vector-store-compatible';
 import { SupportedAction } from '~/interfaces/activity';
 import { AttachmentType } from '~/server/interfaces/attachment';
 import { accessTokenParser } from '~/server/middlewares/access-token-parser';
@@ -368,6 +369,11 @@ module.exports = (crowi) => {
           attachment: attachment.toObject({ virtuals: true }),
         };
 
+        if (isVectorStoreCompatible(file)) {
+          // TODO: https://redmine.weseek.co.jp/issues/165326
+          // Process for uploading to VectorStore
+        }
+
         activityEvent.emit('update', res.locals.activity._id, { action: SupportedAction.ACTION_ATTACHMENT_ADD });
 
         res.apiv3(result);

+ 12 - 8
pnpm-lock.yaml

@@ -301,6 +301,9 @@ importers:
       '@types/async':
         specifier: ^3.2.24
         version: 3.2.24
+      '@types/multer':
+        specifier: ^1.4.12
+        version: 1.4.12
       JSONStream:
         specifier: ^1.3.5
         version: 1.3.5
@@ -11091,6 +11094,7 @@ packages:
   node-domexception@1.0.0:
     resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==}
     engines: {node: '>=10.5.0'}
+    deprecated: Use your platform's native DOMException instead
 
   node-fetch-h2@2.3.0:
     resolution: {integrity: sha512-ofRW94Ab0T4AOh5Fk8t0h8OBWrmjb0SSB20xh1H8YnPV9EJ+f5AMoYSUQ2zgJ4Iq2HAK0I2l5/Nequ8YzFS3Hg==}
@@ -14791,7 +14795,7 @@ snapshots:
       '@aws-sdk/client-sso-oidc': 3.600.0
       '@aws-sdk/client-sts': 3.600.0(@aws-sdk/client-sso-oidc@3.600.0)
       '@aws-sdk/core': 3.598.0
-      '@aws-sdk/credential-provider-node': 3.600.0(@aws-sdk/client-sso-oidc@3.600.0)(@aws-sdk/client-sts@3.600.0)
+      '@aws-sdk/credential-provider-node': 3.600.0(@aws-sdk/client-sso-oidc@3.600.0)(@aws-sdk/client-sts@3.600.0(@aws-sdk/client-sso-oidc@3.600.0))
       '@aws-sdk/middleware-host-header': 3.598.0
       '@aws-sdk/middleware-logger': 3.598.0
       '@aws-sdk/middleware-recursion-detection': 3.598.0
@@ -14899,7 +14903,7 @@ snapshots:
       '@aws-crypto/sha256-js': 5.2.0
       '@aws-sdk/client-sts': 3.600.0(@aws-sdk/client-sso-oidc@3.600.0)
       '@aws-sdk/core': 3.598.0
-      '@aws-sdk/credential-provider-node': 3.600.0(@aws-sdk/client-sso-oidc@3.600.0)(@aws-sdk/client-sts@3.600.0)
+      '@aws-sdk/credential-provider-node': 3.600.0(@aws-sdk/client-sso-oidc@3.600.0)(@aws-sdk/client-sts@3.600.0(@aws-sdk/client-sso-oidc@3.600.0))
       '@aws-sdk/middleware-host-header': 3.598.0
       '@aws-sdk/middleware-logger': 3.598.0
       '@aws-sdk/middleware-recursion-detection': 3.598.0
@@ -15075,7 +15079,7 @@ snapshots:
       '@aws-crypto/sha256-js': 5.2.0
       '@aws-sdk/client-sso-oidc': 3.600.0
       '@aws-sdk/core': 3.598.0
-      '@aws-sdk/credential-provider-node': 3.600.0(@aws-sdk/client-sso-oidc@3.600.0)(@aws-sdk/client-sts@3.600.0)
+      '@aws-sdk/credential-provider-node': 3.600.0(@aws-sdk/client-sso-oidc@3.600.0)(@aws-sdk/client-sts@3.600.0(@aws-sdk/client-sso-oidc@3.600.0))
       '@aws-sdk/middleware-host-header': 3.598.0
       '@aws-sdk/middleware-logger': 3.598.0
       '@aws-sdk/middleware-recursion-detection': 3.598.0
@@ -15186,7 +15190,7 @@ snapshots:
     transitivePeerDependencies:
       - aws-crt
 
-  '@aws-sdk/credential-provider-ini@3.598.0(@aws-sdk/client-sso-oidc@3.600.0)(@aws-sdk/client-sts@3.600.0)':
+  '@aws-sdk/credential-provider-ini@3.598.0(@aws-sdk/client-sso-oidc@3.600.0)(@aws-sdk/client-sts@3.600.0(@aws-sdk/client-sso-oidc@3.600.0))':
     dependencies:
       '@aws-sdk/client-sts': 3.600.0(@aws-sdk/client-sso-oidc@3.600.0)
       '@aws-sdk/credential-provider-env': 3.598.0
@@ -15221,11 +15225,11 @@ snapshots:
     transitivePeerDependencies:
       - aws-crt
 
-  '@aws-sdk/credential-provider-node@3.600.0(@aws-sdk/client-sso-oidc@3.600.0)(@aws-sdk/client-sts@3.600.0)':
+  '@aws-sdk/credential-provider-node@3.600.0(@aws-sdk/client-sso-oidc@3.600.0)(@aws-sdk/client-sts@3.600.0(@aws-sdk/client-sso-oidc@3.600.0))':
     dependencies:
       '@aws-sdk/credential-provider-env': 3.598.0
       '@aws-sdk/credential-provider-http': 3.598.0
-      '@aws-sdk/credential-provider-ini': 3.598.0(@aws-sdk/client-sso-oidc@3.600.0)(@aws-sdk/client-sts@3.600.0)
+      '@aws-sdk/credential-provider-ini': 3.598.0(@aws-sdk/client-sso-oidc@3.600.0)(@aws-sdk/client-sts@3.600.0(@aws-sdk/client-sso-oidc@3.600.0))
       '@aws-sdk/credential-provider-process': 3.598.0
       '@aws-sdk/credential-provider-sso': 3.598.0(@aws-sdk/client-sso-oidc@3.600.0)
       '@aws-sdk/credential-provider-web-identity': 3.598.0(@aws-sdk/client-sts@3.600.0)
@@ -15308,8 +15312,8 @@ snapshots:
       '@aws-sdk/credential-provider-cognito-identity': 3.600.0
       '@aws-sdk/credential-provider-env': 3.598.0
       '@aws-sdk/credential-provider-http': 3.598.0
-      '@aws-sdk/credential-provider-ini': 3.598.0(@aws-sdk/client-sso-oidc@3.600.0)(@aws-sdk/client-sts@3.600.0)
-      '@aws-sdk/credential-provider-node': 3.600.0(@aws-sdk/client-sso-oidc@3.600.0)(@aws-sdk/client-sts@3.600.0)
+      '@aws-sdk/credential-provider-ini': 3.598.0(@aws-sdk/client-sso-oidc@3.600.0)(@aws-sdk/client-sts@3.600.0(@aws-sdk/client-sso-oidc@3.600.0))
+      '@aws-sdk/credential-provider-node': 3.600.0(@aws-sdk/client-sso-oidc@3.600.0)(@aws-sdk/client-sts@3.600.0(@aws-sdk/client-sso-oidc@3.600.0))
       '@aws-sdk/credential-provider-process': 3.598.0
       '@aws-sdk/credential-provider-sso': 3.598.0(@aws-sdk/client-sso-oidc@3.600.0)
       '@aws-sdk/credential-provider-web-identity': 3.598.0(@aws-sdk/client-sts@3.600.0)