|
@@ -1,100 +1,177 @@
|
|
|
-import fs from 'fs';
|
|
|
|
|
-import path from 'path';
|
|
|
|
|
-import { Writable } from 'stream';
|
|
|
|
|
|
|
+import type { Readable } from 'stream';
|
|
|
|
|
+import { Writable, pipeline } from 'stream';
|
|
|
|
|
|
|
|
import { type IPage, isPopulated } from '@growi/core';
|
|
import { type IPage, isPopulated } from '@growi/core';
|
|
|
import { normalizePath } from '@growi/core/dist/utils/path-utils';
|
|
import { normalizePath } from '@growi/core/dist/utils/path-utils';
|
|
|
-import archiver, { Archiver } from 'archiver';
|
|
|
|
|
|
|
+import type { Archiver } from 'archiver';
|
|
|
|
|
+import archiver from 'archiver';
|
|
|
|
|
+import type { QueueObject } from 'async';
|
|
|
|
|
+import gc from 'expose-gc/function';
|
|
|
import mongoose from 'mongoose';
|
|
import mongoose from 'mongoose';
|
|
|
|
|
|
|
|
-import { PageModel, PageDocument } from '~/server/models/page';
|
|
|
|
|
|
|
+import type { PageModel, PageDocument } from '~/server/models/page';
|
|
|
|
|
+import type { IAwsMultipartUploader } from '~/server/service/file-uploader/aws/multipart-upload';
|
|
|
|
|
+import { getBufferToFixedSizeTransform } from '~/server/util/stream';
|
|
|
import loggerFactory from '~/utils/logger';
|
|
import loggerFactory from '~/utils/logger';
|
|
|
|
|
|
|
|
|
|
|
|
|
const logger = loggerFactory('growi:services:PageBulkExportService');
|
|
const logger = loggerFactory('growi:services:PageBulkExportService');
|
|
|
|
|
|
|
|
-const streamToPromise = require('stream-to-promise');
|
|
|
|
|
|
|
+// Custom type for back pressure workaround
|
|
|
|
|
+interface ArchiverWithQueue extends Archiver {
|
|
|
|
|
+ _queue?: QueueObject<any>;
|
|
|
|
|
+}
|
|
|
|
|
|
|
|
class PageBulkExportService {
|
|
class PageBulkExportService {
|
|
|
|
|
|
|
|
crowi: any;
|
|
crowi: any;
|
|
|
|
|
|
|
|
|
|
+ // multipart upload part size
|
|
|
|
|
+ partSize = 5 * 1024 * 1024; // 5MB
|
|
|
|
|
+
|
|
|
|
|
+ pageBatchSize = 100;
|
|
|
|
|
+
|
|
|
constructor(crowi) {
|
|
constructor(crowi) {
|
|
|
this.crowi = crowi;
|
|
this.crowi = crowi;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- getPageReadableStream(basePagePath: string) {
|
|
|
|
|
|
|
+ async bulkExportWithBasePagePath(basePagePath: string): Promise<void> {
|
|
|
|
|
+ const timeStamp = (new Date()).getTime();
|
|
|
|
|
+ const uploadKey = `page-bulk-export-${timeStamp}.zip`;
|
|
|
|
|
+
|
|
|
|
|
+ const pagesReadable = this.getPageReadable(basePagePath);
|
|
|
|
|
+ const zipArchiver = this.setUpZipArchiver();
|
|
|
|
|
+ const pagesWritable = this.getPageWritable(zipArchiver);
|
|
|
|
|
+ const bufferToPartSizeTransform = getBufferToFixedSizeTransform(this.partSize);
|
|
|
|
|
+
|
|
|
|
|
+ // init multipart upload
|
|
|
|
|
+ // TODO: Create abstract interface IMultipartUploader in https://redmine.weseek.co.jp/issues/135775
|
|
|
|
|
+ const multipartUploader: IAwsMultipartUploader | undefined = this.crowi?.fileUploadService?.createMultipartUploader(uploadKey);
|
|
|
|
|
+ try {
|
|
|
|
|
+ if (multipartUploader == null) {
|
|
|
|
|
+ throw Error('Multipart upload not available for configured file upload type');
|
|
|
|
|
+ }
|
|
|
|
|
+ await multipartUploader.initUpload();
|
|
|
|
|
+ }
|
|
|
|
|
+ catch (err) {
|
|
|
|
|
+ await this.handleExportError(err, multipartUploader);
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ const multipartUploadWritable = this.getMultipartUploadWritable(multipartUploader);
|
|
|
|
|
+
|
|
|
|
|
+ // Cannot directly pipe from pagesWritable to zipArchiver due to how the 'append' method works.
|
|
|
|
|
+ // Hence, execution of two pipelines is required.
|
|
|
|
|
+ pipeline(pagesReadable, pagesWritable, err => this.handleExportError(err, multipartUploader));
|
|
|
|
|
+ pipeline(zipArchiver, bufferToPartSizeTransform, multipartUploadWritable, err => this.handleExportError(err, multipartUploader));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ async handleExportError(err: Error | null, multipartUploader: IAwsMultipartUploader | undefined): Promise<void> {
|
|
|
|
|
+ if (err != null) {
|
|
|
|
|
+ logger.error(err);
|
|
|
|
|
+ if (multipartUploader != null) {
|
|
|
|
|
+ await multipartUploader.abortUpload();
|
|
|
|
|
+ }
|
|
|
|
|
+ // TODO: notify failure to client: https://redmine.weseek.co.jp/issues/78037
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * Get a Readable of all the pages under the specified path, including the root page.
|
|
|
|
|
+ */
|
|
|
|
|
+ private getPageReadable(basePagePath: string): Readable {
|
|
|
const Page = mongoose.model<IPage, PageModel>('Page');
|
|
const Page = mongoose.model<IPage, PageModel>('Page');
|
|
|
const { PageQueryBuilder } = Page;
|
|
const { PageQueryBuilder } = Page;
|
|
|
|
|
|
|
|
const builder = new PageQueryBuilder(Page.find())
|
|
const builder = new PageQueryBuilder(Page.find())
|
|
|
- .addConditionToListOnlyDescendants(basePagePath);
|
|
|
|
|
|
|
+ .addConditionToListWithDescendants(basePagePath);
|
|
|
|
|
|
|
|
return builder
|
|
return builder
|
|
|
.query
|
|
.query
|
|
|
.populate('revision')
|
|
.populate('revision')
|
|
|
.lean()
|
|
.lean()
|
|
|
- .cursor({ batchSize: 100 }); // convert to stream
|
|
|
|
|
|
|
+ .cursor({ batchSize: this.pageBatchSize });
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- setUpZipArchiver(): Archiver {
|
|
|
|
|
- const timeStamp = (new Date()).getTime();
|
|
|
|
|
- const zipFilePath = path.join(__dirname, `${timeStamp}.md.zip`);
|
|
|
|
|
|
|
+ /**
|
|
|
|
|
+ * Get a Writable that writes the page body to a zip file
|
|
|
|
|
+ */
|
|
|
|
|
+ private getPageWritable(zipArchiver: Archiver): Writable {
|
|
|
|
|
+ return new Writable({
|
|
|
|
|
+ objectMode: true,
|
|
|
|
|
+ write: async(page: PageDocument, encoding, callback) => {
|
|
|
|
|
+ try {
|
|
|
|
|
+ const revision = page.revision;
|
|
|
|
|
+
|
|
|
|
|
+ if (revision != null && isPopulated(revision)) {
|
|
|
|
|
+ const markdownBody = revision.body;
|
|
|
|
|
+ const pathNormalized = normalizePath(page.path);
|
|
|
|
|
+ // Since archiver does not provide a proper way to back pressure at the moment, use the _queue property as a workaround
|
|
|
|
|
+ // ref: https://github.com/archiverjs/node-archiver/issues/611
|
|
|
|
|
+ const { _queue } = zipArchiver.append(markdownBody, { name: `${pathNormalized}.md` }) as ArchiverWithQueue;
|
|
|
|
|
+ if (_queue == null) {
|
|
|
|
|
+ throw Error('Cannot back pressure the export pipeline. Aborting the export.');
|
|
|
|
|
+ }
|
|
|
|
|
+ if (_queue.length() > this.pageBatchSize) {
|
|
|
|
|
+ await _queue.drain();
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ catch (err) {
|
|
|
|
|
+ callback(err);
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ callback();
|
|
|
|
|
+ },
|
|
|
|
|
+ final: (callback) => {
|
|
|
|
|
+ zipArchiver.finalize();
|
|
|
|
|
+ callback();
|
|
|
|
|
+ },
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- const archive = archiver('zip', {
|
|
|
|
|
|
|
+ private setUpZipArchiver(): Archiver {
|
|
|
|
|
+ const zipArchiver = archiver('zip', {
|
|
|
zlib: { level: 9 }, // maximum compression
|
|
zlib: { level: 9 }, // maximum compression
|
|
|
});
|
|
});
|
|
|
|
|
|
|
|
// good practice to catch warnings (ie stat failures and other non-blocking errors)
|
|
// good practice to catch warnings (ie stat failures and other non-blocking errors)
|
|
|
- archive.on('warning', (err) => {
|
|
|
|
|
|
|
+ zipArchiver.on('warning', (err) => {
|
|
|
if (err.code === 'ENOENT') logger.error(err);
|
|
if (err.code === 'ENOENT') logger.error(err);
|
|
|
else throw err;
|
|
else throw err;
|
|
|
});
|
|
});
|
|
|
- // good practice to catch this error explicitly
|
|
|
|
|
- archive.on('error', (err) => { throw err });
|
|
|
|
|
-
|
|
|
|
|
- // pipe archive data to the file
|
|
|
|
|
- const output = fs.createWriteStream(zipFilePath);
|
|
|
|
|
- archive.pipe(output);
|
|
|
|
|
|
|
|
|
|
- return archive;
|
|
|
|
|
|
|
+ return zipArchiver;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- async bulkExportWithBasePagePath(basePagePath: string): Promise<void> {
|
|
|
|
|
- // get pages with descendants as stream
|
|
|
|
|
- const pageReadableStream = this.getPageReadableStream(basePagePath);
|
|
|
|
|
-
|
|
|
|
|
- const archive = this.setUpZipArchiver();
|
|
|
|
|
|
|
+ private getMultipartUploadWritable(multipartUploader: IAwsMultipartUploader): Writable {
|
|
|
|
|
+ let partNumber = 1;
|
|
|
|
|
|
|
|
- const pagesWritable = new Writable({
|
|
|
|
|
- objectMode: true,
|
|
|
|
|
- async write(page: PageDocument, encoding, callback) {
|
|
|
|
|
|
|
+ return new Writable({
|
|
|
|
|
+ write: async(part: Buffer, encoding, callback) => {
|
|
|
try {
|
|
try {
|
|
|
- const revision = page.revision;
|
|
|
|
|
-
|
|
|
|
|
- if (revision != null && isPopulated(revision)) {
|
|
|
|
|
- const markdownBody = revision.body;
|
|
|
|
|
- // write to zip
|
|
|
|
|
- const pathNormalized = normalizePath(page.path);
|
|
|
|
|
- archive.append(markdownBody, { name: `${pathNormalized}.md` });
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ await multipartUploader.uploadPart(part, partNumber);
|
|
|
|
|
+ partNumber += 1;
|
|
|
|
|
+ // First aid to prevent unexplained memory leaks
|
|
|
|
|
+ logger.info('global.gc() invoked.');
|
|
|
|
|
+ gc();
|
|
|
}
|
|
}
|
|
|
catch (err) {
|
|
catch (err) {
|
|
|
- logger.error(err);
|
|
|
|
|
- throw Error('Failed to export page tree');
|
|
|
|
|
|
|
+ callback(err);
|
|
|
|
|
+ return;
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
callback();
|
|
callback();
|
|
|
},
|
|
},
|
|
|
- final(callback) {
|
|
|
|
|
- archive.finalize();
|
|
|
|
|
|
|
+ async final(callback) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ await multipartUploader.completeUpload();
|
|
|
|
|
+ }
|
|
|
|
|
+ catch (err) {
|
|
|
|
|
+ callback(err);
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
callback();
|
|
callback();
|
|
|
},
|
|
},
|
|
|
});
|
|
});
|
|
|
-
|
|
|
|
|
- pageReadableStream.pipe(pagesWritable);
|
|
|
|
|
-
|
|
|
|
|
- await streamToPromise(archive);
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
}
|
|
}
|