Browse Source

Merge pull request #10833 from growilabs/fix/178290-bulk-export-aws-minimal-2

fix: Bulk export fails due to S3 upload complete version
mergify[bot] 1 week ago
parent
commit
0d6fefc174

+ 1 - 0
apps/app/package.json

@@ -61,6 +61,7 @@
   "dependencies": {
     "@akebifiky/remark-simple-plantuml": "^1.0.2",
     "@aws-sdk/client-s3": "^3.1014.0",
+    "@aws-sdk/lib-storage": "^3.1014.0",
     "@aws-sdk/s3-request-presigner": "^3.1014.0",
     "@azure/identity": "^4.4.1",
     "@azure/openai": "^2.0.0",

+ 9 - 8
apps/app/src/features/page-bulk-export/server/service/page-bulk-export-job-cron/steps/compress-and-upload.ts

@@ -53,7 +53,12 @@ async function postProcess(
 }
 
 /**
- * Execute a pipeline that reads the page files from the temporal fs directory, compresses them, and uploads to the cloud storage
+ * Compress page files into a tar.gz archive and upload to cloud storage.
+ *
+ * Wraps archiver output with PassThrough to provide a Node.js native Readable,
+ * since archiver uses npm's readable-stream which fails AWS SDK's instanceof check.
+ * The Content-Length / Transfer-Encoding issue is resolved by aws/index.ts using
+ * the Upload class from @aws-sdk/lib-storage.
  */
 export async function compressAndUpload(
   this: IPageBulkExportJobCronService,
@@ -78,12 +83,11 @@ export async function compressAndUpload(
 
   // Wrap with Node.js native PassThrough so that AWS SDK recognizes the stream as a native Readable
   const uploadStream = new PassThrough();
-
-  // Establish pipe before finalize to ensure data flows correctly
   pageArchiver.pipe(uploadStream);
+
   pageArchiver.on('error', (err) => {
+    logger.error('pageArchiver error', err);
     uploadStream.destroy(err);
-    pageArchiver.destroy();
   });
 
   pageArchiver.directory(this.getTmpOutputDir(pageBulkExportJob), false);
@@ -100,9 +104,6 @@ export async function compressAndUpload(
     );
   } catch (e) {
     logger.error(e);
-    this.handleError(e, pageBulkExportJob);
-  } finally {
-    pageArchiver.destroy();
-    uploadStream.destroy();
+    await this.handleError(e, pageBulkExportJob);
   }
 }

+ 31 - 20
apps/app/src/server/service/file-uploader/aws/index.ts

@@ -13,6 +13,7 @@ import {
   PutObjectCommand,
   S3Client,
 } from '@aws-sdk/client-s3';
+import { Upload } from '@aws-sdk/lib-storage';
 import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
 import type { NonBlankString } from '@growi/core/dist/interfaces';
 import { toNonBlankStringOrUndefined } from '@growi/core/dist/interfaces';
@@ -252,30 +253,40 @@ class AwsFileUploader extends AbstractFileUploader {
     const filePath = getFilePathOnStorage(attachment);
     const contentHeaders = createContentHeaders(attachment);
 
-    try {
-      const uploadTimeout = configManager.getConfig('app:fileUploadTimeout');
+    const uploadTimeout = configManager.getConfig('app:fileUploadTimeout');
 
-      await s3.send(
-        new PutObjectCommand({
-          Bucket: getS3Bucket(),
-          Key: filePath,
-          Body: readable,
-          ACL: getS3PutObjectCannedAcl(),
-          // put type and the file name for reference information when uploading
-          ContentType: getContentHeaderValue(contentHeaders, 'Content-Type'),
-          ContentDisposition: getContentHeaderValue(
-            contentHeaders,
-            'Content-Disposition',
-          ),
-        }),
-        { abortSignal: AbortSignal.timeout(uploadTimeout) },
-      );
+    // Use @aws-sdk/lib-storage Upload to handle streaming uploads:
+    // - Resolves archiver's readable-stream (npm) failing AWS SDK's instanceof Readable check
+    // - Avoids Transfer-Encoding: chunked which S3 rejects with 501 (PutObjectCommand issue)
+    // - Under 5MB: falls back to PutObjectCommand internally
+    // - Over 5MB: uses multipart upload (requires s3:AbortMultipartUpload permission)
+    const upload = new Upload({
+      client: s3,
+      params: {
+        Bucket: getS3Bucket(),
+        Key: filePath,
+        Body: readable,
+        ACL: getS3PutObjectCannedAcl(),
+        ContentType: getContentHeaderValue(contentHeaders, 'Content-Type'),
+        ContentDisposition: getContentHeaderValue(
+          contentHeaders,
+          'Content-Disposition',
+        ),
+      },
+    });
+
+    const timeoutId = setTimeout(() => {
+      logger.warn(`Upload timeout: fileName=${attachment.fileName}`);
+      upload.abort();
+    }, uploadTimeout);
+
+    try {
+      await upload.done();
 
       logger.debug(
         `File upload completed successfully: fileName=${attachment.fileName}`,
       );
     } catch (error) {
-      // Handle timeout error specifically
       if (error.name === 'AbortError') {
         logger.warn(`Upload timeout: fileName=${attachment.fileName}`, error);
       } else {
@@ -284,9 +295,9 @@ class AwsFileUploader extends AbstractFileUploader {
           error,
         );
       }
-      // Re-throw the error to be handled by the caller.
-      // The pipeline automatically handles stream cleanup on error.
       throw error;
+    } finally {
+      clearTimeout(timeoutId);
     }
   }
 

File diff suppressed because it is too large
+ 205 - 179
pnpm-lock.yaml


Some files were not shown because too many files changed in this diff