فهرست منبع

Merge pull request #6962 from mizozobu/feat/resume-file-upload

feat: resume file upload
Haku Mizuki 3 سال پیش
والد
کامیت
c09e617a7e

+ 2 - 2
packages/app/src/server/models/vo/g2g-transfer-error.ts

@@ -2,8 +2,8 @@ import ExtensibleCustomError from 'extensible-custom-error';
 
 
 export const G2GTransferErrorCode = {
 export const G2GTransferErrorCode = {
   INVALID_TRANSFER_KEY_STRING: 'INVALID_TRANSFER_KEY_STRING',
   INVALID_TRANSFER_KEY_STRING: 'INVALID_TRANSFER_KEY_STRING',
-  FAILED_TO_RETREIVE_GROWI_INFO: 'FAILED_TO_RETREIVE_GROWI_INFO',
-  FAILED_TO_RETREIVE_ATTACHMENTS: 'FAILED_TO_RETREIVE_ATTACHMENTS',
+  FAILED_TO_RETRIEVE_GROWI_INFO: 'FAILED_TO_RETRIEVE_GROWI_INFO',
+  FAILED_TO_RETRIEVE_FILE_METADATA: 'FAILED_TO_RETRIEVE_FILE_METADATA',
 } as const;
 } as const;
 
 
 export type G2GTransferErrorCode = typeof G2GTransferErrorCode[keyof typeof G2GTransferErrorCode];
 export type G2GTransferErrorCode = typeof G2GTransferErrorCode[keyof typeof G2GTransferErrorCode];

+ 4 - 14
packages/app/src/server/routes/apiv3/g2g-transfer.ts

@@ -4,7 +4,6 @@ import path from 'path';
 import { ErrorV3 } from '@growi/core';
 import { ErrorV3 } from '@growi/core';
 import express, { NextFunction, Request, Router } from 'express';
 import express, { NextFunction, Request, Router } from 'express';
 import { body } from 'express-validator';
 import { body } from 'express-validator';
-import { type Document } from 'mongoose';
 import multer from 'multer';
 import multer from 'multer';
 
 
 import { SupportedAction } from '~/interfaces/activity';
 import { SupportedAction } from '~/interfaces/activity';
@@ -148,15 +147,9 @@ module.exports = (crowi: Crowi): Router => {
   const pushRouter = express.Router();
   const pushRouter = express.Router();
 
 
   // eslint-disable-next-line max-len
   // eslint-disable-next-line max-len
-  receiveRouter.get('/attachments', verifyAndExtractTransferKey, async(req: Request & { transferKey: TransferKey, operatorUserId: string }, res: ApiV3Response) => {
-    const transform = (doc: Document) => JSON.stringify(doc._id.toString());
-    const readStream = crowi.exportService.createExportCollectionStream(
-      'attachments',
-      undefined,
-      { projection: { _id: 1 } },
-      transform,
-    );
-    return readStream.pipe(res);
+  receiveRouter.get('/files', verifyAndExtractTransferKey, async(req: Request & { transferKey: TransferKey, operatorUserId: string }, res: ApiV3Response) => {
+    const files = await crowi.fileUploadService.listFiles();
+    return res.apiv3({ files });
   });
   });
 
 
   // Auto import
   // Auto import
@@ -398,12 +391,9 @@ module.exports = (crowi: Crowi): Router => {
       return res.apiv3Err(new ErrorV3(transferability.reason, 'growi_incompatible_to_transfer'));
       return res.apiv3Err(new ErrorV3(transferability.reason, 'growi_incompatible_to_transfer'));
     }
     }
 
 
-    // get attachments from new growi
-    const attachmentIdsFromNewGrowi = await g2gTransferPusherService.getAttachments(tk);
-
     // Start transfer
     // Start transfer
     try {
     try {
-      await g2gTransferPusherService.startTransfer(tk, req.user, toGROWIInfo, collections, optionsMap, attachmentIdsFromNewGrowi);
+      await g2gTransferPusherService.startTransfer(tk, req.user, toGROWIInfo, collections, optionsMap);
     }
     }
     catch (err) {
     catch (err) {
       logger.error(err);
       logger.error(err);

+ 0 - 20
packages/app/src/server/service/export.js

@@ -164,26 +164,6 @@ class ExportService {
     return transformStream;
     return transformStream;
   }
   }
 
 
-  /**
-   * dump a mongodb collection into json
-   *
-   * @memberOf ExportService
-   * @param {string} collectionName collection name
-   * @param {Filter<TSchema>} filter find filter
-   * @param {FindOptions} options find options
-   * @param {CursorStreamOptions.transform} transform a transformation method applied to each document emitted by the stream
-   * @return {NodeJS.ReadStream} readstream for the collection
-   */
-  createExportCollectionStream(collectionName, filter, options, transform = JSON.stringify) {
-    const collection = mongoose.connection.collection(collectionName);
-    const nativeCursor = collection.find(filter, options);
-    const readStream = nativeCursor.stream({ transform });
-    const transformStream = this.generateTransformStream();
-
-    return readStream
-      .pipe(transformStream);
-  }
-
   /**
   /**
    * dump a mongodb collection into json
    * dump a mongodb collection into json
    *
    *

+ 53 - 0
packages/app/src/server/service/file-uploader/aws.ts

@@ -6,6 +6,7 @@ import {
   PutObjectCommand,
   PutObjectCommand,
   DeleteObjectCommand,
   DeleteObjectCommand,
   GetObjectCommandOutput,
   GetObjectCommandOutput,
+  ListObjectsCommand,
 } from '@aws-sdk/client-s3';
 } from '@aws-sdk/client-s3';
 import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
 import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
 import urljoin from 'url-join';
 import urljoin from 'url-join';
@@ -15,6 +16,15 @@ import loggerFactory from '~/utils/logger';
 
 
 const logger = loggerFactory('growi:service:fileUploaderAws');
 const logger = loggerFactory('growi:service:fileUploaderAws');
 
 
+/**
+ * File metadata in storage
+ * TODO: mv this to "./uploader"
+ */
+  interface FileMeta {
+  name: string;
+  size: number;
+}
+
 type AwsCredential = {
 type AwsCredential = {
   accessKeyId: string,
   accessKeyId: string,
   secretAccessKey: string
   secretAccessKey: string
@@ -230,5 +240,48 @@ module.exports = (crowi) => {
     return lib.doCheckLimit(uploadFileSize, maxFileSize, totalLimit);
     return lib.doCheckLimit(uploadFileSize, maxFileSize, totalLimit);
   };
   };
 
 
+  /**
+   * List files in storage
+   */
+  lib.listFiles = async() => {
+    if (!lib.getIsReadable()) {
+      throw new Error('AWS is not configured.');
+    }
+
+    const files: FileMeta[] = [];
+    const s3 = S3Factory();
+    const awsConfig = getAwsConfig();
+    const params = {
+      Bucket: awsConfig.bucket,
+    };
+    let shouldContinue = true;
+    let nextMarker: string | undefined;
+
+    // handle pagination
+    while (shouldContinue) {
+      // eslint-disable-next-line no-await-in-loop
+      const { Contents = [], IsTruncated, NextMarker } = await s3.send(new ListObjectsCommand({
+        ...params,
+        Marker: nextMarker,
+      }));
+      files.push(...(
+        Contents.map(({ Key, Size }) => ({
+          name: Key as string,
+          size: Size as number,
+        }))
+      ));
+
+      if (!IsTruncated) {
+        shouldContinue = false;
+        nextMarker = undefined;
+      }
+      else {
+        nextMarker = NextMarker;
+      }
+    }
+
+    return files;
+  };
+
   return lib;
   return lib;
 };
 };

+ 17 - 0
packages/app/src/server/service/file-uploader/gcs.js

@@ -184,5 +184,22 @@ module.exports = function(crowi) {
     return lib.doCheckLimit(uploadFileSize, maxFileSize, gcsTotalLimit);
     return lib.doCheckLimit(uploadFileSize, maxFileSize, gcsTotalLimit);
   };
   };
 
 
+  /**
+   * List files in storage
+   */
+  lib.listFiles = async() => {
+    if (!this.getIsReadable()) {
+      throw new Error('GCS is not configured.');
+    }
+
+    const gcs = getGcsInstance();
+    const bucket = gcs.bucket(getGcsBucket());
+    const [files] = await bucket.getFiles();
+
+    return files.map(({ name, metadata: { size } }) => {
+      return { name, size };
+    });
+  };
+
   return lib;
   return lib;
 };
 };

+ 10 - 0
packages/app/src/server/service/file-uploader/gridfs.js

@@ -125,5 +125,15 @@ module.exports = function(crowi) {
     return AttachmentFile.read({ _id: attachmentFile._id });
     return AttachmentFile.read({ _id: attachmentFile._id });
   };
   };
 
 
+  /**
+   * List files in storage
+   */
+  lib.listFiles = async() => {
+    const attachmentFiles = await AttachmentFile.find();
+    return attachmentFiles.map(({ filename: name, length: size }) => ({
+      name, size,
+    }));
+  };
+
   return lib;
   return lib;
 };
 };

+ 29 - 0
packages/app/src/server/service/file-uploader/local.js

@@ -3,7 +3,9 @@ import loggerFactory from '~/utils/logger';
 const logger = loggerFactory('growi:service:fileUploaderLocal');
 const logger = loggerFactory('growi:service:fileUploaderLocal');
 
 
 const fs = require('fs');
 const fs = require('fs');
+const fsPromises = require('fs/promises');
 const path = require('path');
 const path = require('path');
+
 const mkdir = require('mkdirp');
 const mkdir = require('mkdirp');
 const streamToPromise = require('stream-to-promise');
 const streamToPromise = require('stream-to-promise');
 const urljoin = require('url-join');
 const urljoin = require('url-join');
@@ -28,6 +30,16 @@ module.exports = function(crowi) {
     return filePath;
     return filePath;
   }
   }
 
 
+  async function readdirRecursively(dirPath) {
+    const directories = await fsPromises.readdir(dirPath, { withFileTypes: true });
+    const files = await Promise.all(directories.map((directory) => {
+      const childDirPathOrFilePath = path.resolve(dirPath, directory.name);
+      return directory.isDirectory() ? readdirRecursively(childDirPathOrFilePath) : childDirPathOrFilePath;
+    }));
+
+    return files.flat();
+  }
+
   lib.isValidUploadSettings = function() {
   lib.isValidUploadSettings = function() {
     return true;
     return true;
   };
   };
@@ -126,5 +138,22 @@ module.exports = function(crowi) {
     return res.end();
     return res.end();
   };
   };
 
 
+  /**
+   * List files in storage
+   */
+  lib.listFiles = async() => {
+    // `mkdir -p` to avoid ENOENT error
+    await mkdir(basePath);
+    const filePaths = await readdirRecursively(basePath);
+    return Promise.all(
+      filePaths.map(
+        file => fsPromises.stat(file).then(({ size }) => ({
+          name: path.relative(basePath, file),
+          size,
+        })),
+      ),
+    );
+  };
+
   return lib;
   return lib;
 };
 };

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 0 - 0
packages/app/src/server/service/file-uploader/none.js


+ 73 - 17
packages/app/src/server/service/g2g-transfer.ts

@@ -1,5 +1,6 @@
 import { randomUUID } from 'crypto';
 import { randomUUID } from 'crypto';
 import { createReadStream, ReadStream } from 'fs';
 import { createReadStream, ReadStream } from 'fs';
+import { basename } from 'path';
 import { Readable } from 'stream';
 import { Readable } from 'stream';
 
 
 // eslint-disable-next-line no-restricted-imports
 // eslint-disable-next-line no-restricted-imports
@@ -49,10 +50,13 @@ export type IDataGROWIInfo = {
 }
 }
 
 
 /**
 /**
- * Attachment data already exsisting in the new GROWI
+ * File metadata in storage
+ * TODO: mv this to "./file-uploader/uploader"
  */
  */
-// TODO: use Attachemnt model type
-export type Attachment = any;
+interface FileMeta {
+  name: string;
+  size: number;
+}
 
 
 /**
 /**
  * Return type for {@link Pusher.getTransferability}
  * Return type for {@link Pusher.getTransferability}
@@ -70,11 +74,16 @@ interface Pusher {
    * @param {IDataGROWIInfo} fromGROWIInfo
    * @param {IDataGROWIInfo} fromGROWIInfo
    */
    */
   getTransferability(fromGROWIInfo: IDataGROWIInfo): Promise<IGetTransferabilityReturn>
   getTransferability(fromGROWIInfo: IDataGROWIInfo): Promise<IGetTransferabilityReturn>
+  /**
+   * List files in the storage
+   * @param {TransferKey} tk Transfer key
+   */
+  listFilesInStorage(tk: TransferKey): Promise<FileMeta[]>
   /**
   /**
    * Transfer all Attachment data to destination GROWI
    * Transfer all Attachment data to destination GROWI
    * @param {TransferKey} tk Transfer key
    * @param {TransferKey} tk Transfer key
    */
    */
-  transferAttachments(tk: TransferKey, attachmentIdsFromNewGrowi: string[]): Promise<void>
+  transferAttachments(tk: TransferKey): Promise<void>
   /**
   /**
    * Start transfer data between GROWIs
    * Start transfer data between GROWIs
    * @param {TransferKey} tk TransferKey object
    * @param {TransferKey} tk TransferKey object
@@ -87,7 +96,6 @@ interface Pusher {
     toGROWIInfo: IDataGROWIInfo,
     toGROWIInfo: IDataGROWIInfo,
     collections: string[],
     collections: string[],
     optionsMap: any,
     optionsMap: any,
-    attachmentIdsFromNewGrowi: string[]
   ): Promise<void>
   ): Promise<void>
 }
 }
 
 
@@ -225,7 +233,7 @@ export class G2GTransferPusherService implements Pusher {
     }
     }
     catch (err) {
     catch (err) {
       logger.error(err);
       logger.error(err);
-      throw new G2GTransferError('Failed to retreive growi info.', G2GTransferErrorCode.FAILED_TO_RETREIVE_GROWI_INFO);
+      throw new G2GTransferError('Failed to retrieve growi info.', G2GTransferErrorCode.FAILED_TO_RETRIEVE_GROWI_INFO);
     }
     }
 
 
     return toGROWIInfo;
     return toGROWIInfo;
@@ -278,25 +286,73 @@ export class G2GTransferPusherService implements Pusher {
     return { canTransfer: true };
     return { canTransfer: true };
   }
   }
 
 
-  public async getAttachments(tk: TransferKey): Promise<string[]> {
+  public async listFilesInStorage(tk: TransferKey): Promise<FileMeta[]> {
     try {
     try {
-      const { data } = await axios.get<string[]>('/_api/v3/g2g-transfer/attachments', generateAxiosRequestConfigWithTransferKey(tk));
-      return data;
+      const { data: { files } } = await axios.get<{ files: FileMeta[] }>('/_api/v3/g2g-transfer/files', generateAxiosRequestConfigWithTransferKey(tk));
+      return files;
     }
     }
     catch (err) {
     catch (err) {
       logger.error(err);
       logger.error(err);
-      throw new G2GTransferError('Failed to retreive attachments', G2GTransferErrorCode.FAILED_TO_RETREIVE_ATTACHMENTS);
+      throw new G2GTransferError('Failed to retrieve file metadata', G2GTransferErrorCode.FAILED_TO_RETRIEVE_FILE_METADATA);
     }
     }
   }
   }
 
 
-  public async transferAttachments(tk: TransferKey, attachmentIdsFromNewGrowi: string[]): Promise<void> {
+  public async transferAttachments(tk: TransferKey): Promise<void> {
     const BATCH_SIZE = 100;
     const BATCH_SIZE = 100;
-
     const { fileUploadService } = this.crowi;
     const { fileUploadService } = this.crowi;
     const Attachment = this.crowi.model('Attachment');
     const Attachment = this.crowi.model('Attachment');
-
-    // batch get
-    const attachmentsCursor = await Attachment.find({ _id: { $nin: attachmentIdsFromNewGrowi } }).cursor();
+    const filesFromNewGrowi = await this.listFilesInStorage(tk);
+
+    /**
+     * Given these documents,
+     *
+     * | fileName | fileSize |
+     * | -- | -- |
+     * | a.png | 1024 |
+     * | b.png | 2048 |
+     * | c.png | 1024 |
+     * | d.png | 2048 |
+     *
+     * this filter
+     *
+     * ```jsonc
+     * {
+     *   $and: [
+     *     // a file transferred
+     *     {
+     *       $or: [
+     *         { fileName: { $ne: "a.png" } },
+     *         { fileSize: { $ne: 1024 } }
+     *       ]
+     *     },
+     *     // a file failed to transfer
+     *     {
+     *       $or: [
+     *         { fileName: { $ne: "b.png" } },
+     *         { fileSize: { $ne: 0 } }
+     *       ]
+     *     }
+     *   ]
+     * }
+     * ```
+     *
+     * results in
+     *
+     * | fileName | fileSize |
+     * | -- | -- |
+     * | b.png | 2048 |
+     * | c.png | 1024 |
+     * | d.png | 2048 |
+     */
+    const filter = filesFromNewGrowi.length > 0 ? {
+      $and: filesFromNewGrowi.map(({ name, size }) => ({
+        $or: [
+          { fileName: { $ne: basename(name) } },
+          { fileSize: { $ne: size } },
+        ],
+      })),
+    } : {};
+    const attachmentsCursor = await Attachment.find(filter).cursor();
     const batchStream = createBatchStream(BATCH_SIZE);
     const batchStream = createBatchStream(BATCH_SIZE);
 
 
     for await (const attachmentBatch of attachmentsCursor.pipe(batchStream)) {
     for await (const attachmentBatch of attachmentsCursor.pipe(batchStream)) {
@@ -325,7 +381,7 @@ export class G2GTransferPusherService implements Pusher {
   }
   }
 
 
   // eslint-disable-next-line max-len
   // eslint-disable-next-line max-len
-  public async startTransfer(tk: TransferKey, user: any, toGROWIInfo: IDataGROWIInfo, collections: string[], optionsMap: any, attachmentIdsFromNewGrowi: string[], shouldEmit = true): Promise<void> {
+  public async startTransfer(tk: TransferKey, user: any, toGROWIInfo: IDataGROWIInfo, collections: string[], optionsMap: any, shouldEmit = true): Promise<void> {
     const socket = this.crowi.socketIoService.getAdminSocket();
     const socket = this.crowi.socketIoService.getAdminSocket();
 
 
     if (shouldEmit) {
     if (shouldEmit) {
@@ -390,7 +446,7 @@ export class G2GTransferPusherService implements Pusher {
     }
     }
 
 
     try {
     try {
-      await this.transferAttachments(tk, attachmentIdsFromNewGrowi);
+      await this.transferAttachments(tk);
     }
     }
     catch (err) {
     catch (err) {
       logger.error(err);
       logger.error(err);

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است