Преглед изворни кода

bulk export page to app fs

Futa Arai пре 2 година
родитељ
комит
8b69a4f7bb

+ 3 - 2
apps/app/package.json

@@ -68,8 +68,8 @@
     "@elastic/elasticsearch8": "npm:@elastic/elasticsearch@^8.7.0",
     "@godaddy/terminus": "^4.9.0",
     "@google-cloud/storage": "^5.8.5",
-    "@growi/custom-icons": "link:../../packages/custom-icons",
     "@growi/core": "link:../../packages/core",
+    "@growi/custom-icons": "link:../../packages/custom-icons",
     "@growi/pluginkit": "link:../../packages/pluginkit",
     "@growi/preset-templates": "link:../../packages/preset-templates",
     "@growi/preset-themes": "link:../../packages/preset-themes",
@@ -226,12 +226,13 @@
     "@next/bundle-analyzer": "^13.2.3",
     "@swc-node/jest": "^1.6.2",
     "@swc/jest": "^0.2.24",
+    "@types/archiver": "^6.0.2",
     "@types/express": "^4.17.11",
     "@types/jest": "^29.5.2",
     "@types/react-scroll": "^1.8.4",
     "@types/throttle-debounce": "^5.0.1",
-    "@types/url-join": "^4.0.2",
     "@types/unzip-stream": "^0.3.4",
+    "@types/url-join": "^4.0.2",
     "@vitest/coverage-v8": "^0.34.6",
     "autoprefixer": "^9.0.0",
     "babel-loader": "^8.2.5",

+ 49 - 0
apps/app/src/features/page-bulk-export/server/routes/apiv3/page-bulk-export.ts

@@ -0,0 +1,49 @@
+import { ErrorV3 } from '@growi/core/dist/models';
+import { Router, Request } from 'express';
+import { param, validationResult } from 'express-validator';
+
+import Crowi from '~/server/crowi';
+import { ApiV3Response } from '~/server/routes/apiv3/interfaces/apiv3-response';
+import loggerFactory from '~/utils/logger';
+
+const logger = loggerFactory('growi:routes:apiv3:external-user-group');
+
+const router = Router();
+
+interface AuthorizedRequest extends Request {
+  user?: any
+}
+
+module.exports = (crowi: Crowi): Router => {
+  const loginRequiredStrictly = require('~/server/middlewares/login-required')(crowi);
+
+  const validators = {
+    pageBulkExport: [
+      param('path').exists({ checkFalsy: true }).isString(),
+      param('format').exists({ checkFalsy: true }).isString(),
+    ],
+  };
+
+  router.post('/', loginRequiredStrictly, validators.pageBulkExport, async(req: AuthorizedRequest, res: ApiV3Response) => {
+    const errors = validationResult(req);
+    if (!errors.isEmpty()) {
+      return res.status(400).json({ errors: errors.array() });
+    }
+
+    const { path, format } = req.params;
+
+    try {
+      await crowi.exportService?.bulkExportWithBasePagePath(path);
+
+      return res.apiv3(204);
+    }
+    catch (err) {
+      const msg = 'Error occurred in fetching external user group list';
+      logger.error('Error', err);
+      return res.apiv3Err(new ErrorV3(msg));
+    }
+  });
+
+  return router;
+
+};

+ 1 - 1
apps/app/src/server/models/page.ts

@@ -226,7 +226,7 @@ export class PageQueryBuilder {
   /**
    * generate the query to find the pages '{path}/*' (exclude '{path}' self).
    */
-  addConditionToListOnlyDescendants(path: string, option): PageQueryBuilder {
+  addConditionToListOnlyDescendants(path: string): PageQueryBuilder {
     // exclude the target page
     this.query = this.query.and({ path: { $ne: path } });
 

+ 86 - 2
apps/app/src/server/service/export.ts

@@ -1,10 +1,15 @@
 import fs from 'fs';
 import path from 'path';
-import { Readable, Transform } from 'stream';
+import { Readable, Transform, Writable } from 'stream';
+
+import { isPopulated } from '@growi/core';
+import { normalizePath } from '@growi/core/dist/utils/path-utils';
+import archiver, { Archiver } from 'archiver';
 
 import { toArrayIfNot } from '~/utils/array-utils';
 import loggerFactory from '~/utils/logger';
 
+import { PageModel, PageDocument } from '../models/page';
 import CollectionProgress from '../models/vo/collection-progress';
 import CollectionProgressingStatus from '../models/vo/collection-progressing-status';
 
@@ -16,7 +21,6 @@ import { ZipFileStat } from './interfaces/export';
 
 const logger = loggerFactory('growi:services:ExportService'); // eslint-disable-line no-unused-vars
 
-const archiver = require('archiver');
 const mongoose = require('mongoose');
 const streamToPromise = require('stream-to-promise');
 
@@ -398,6 +402,86 @@ class ExportService {
     return readable;
   }
 
+  getPageReadableStream(basePagePath: string) {
+    const Page = this.crowi.model('Page') as PageModel;
+    const { PageQueryBuilder } = Page;
+
+    const builder = new PageQueryBuilder(Page.find())
+      .addConditionToListOnlyDescendants(basePagePath);
+
+    return builder
+      .query
+      .populate('revision')
+      .lean()
+      .cursor({ batchSize: 100 }); // get stream
+  }
+
+  setUpArchiver(): Archiver {
+    // decide zip file path
+    const timeStamp = (new Date()).getTime();
+    const zipFilePath = path.join(__dirname, `${timeStamp}.md.zip`);
+
+    const archive = archiver('zip', {
+      zlib: { level: 9 }, // maximum compression
+    });
+
+    // good practice to catch warnings (ie stat failures and other non-blocking errors)
+    archive.on('warning', (err) => {
+      if (err.code === 'ENOENT') logger.error(err);
+      else throw err;
+    });
+    // good practice to catch this error explicitly
+    archive.on('error', (err) => { throw err });
+
+    // pipe archive data to the file
+    const output = fs.createWriteStream(zipFilePath);
+    archive.pipe(output);
+
+    return archive;
+  }
+
+  async bulkExportWithBasePagePath(basePagePath: string): Promise<void> {
+    // get pages with descendants as stream
+    const pageReadableStream = this.getPageReadableStream(basePagePath);
+
+    const archive = this.setUpArchiver();
+
+    // read from pageReadableStream, then append each page to archiver
+    // pageReadableStream.pipe(pagesWritable) below will pipe the stream
+    const pagesWritable = new Writable({
+      objectMode: true,
+      async write(page: PageDocument, encoding, callback) {
+        try {
+          const revision = page.revision;
+
+          let markdownBody = 'This page does not have any content.';
+          if (revision != null && isPopulated(revision)) {
+            markdownBody = revision.body;
+          }
+
+          // write to zip
+          const pathNormalized = normalizePath(page.path);
+          archive.append(markdownBody, { name: `${pathNormalized}.md` });
+        }
+        catch (err) {
+          logger.error('Error occurred while converting data to readable: ', err);
+          throw Error('だめ');
+        }
+
+        callback();
+      },
+      final(callback) {
+        // TODO: multi-part upload instead of calling finalize() 78070
+        archive.finalize();
+        callback();
+      },
+    });
+
+    pageReadableStream.pipe(pagesWritable);
+
+    await streamToPromise(archive);
+  }
+
 }
 
 export default ExportService;

+ 1 - 1
apps/app/src/server/service/page/delete-completely-user-home-by-system.ts

@@ -77,7 +77,7 @@ export const deleteCompletelyUserHomeBySystem = async(userHomepagePath: string,
     // Find descendant pages with system deletion condition
     const builder = new PageQueryBuilder(Page.find(), true)
       .addConditionForSystemDeletion()
-      .addConditionToListOnlyDescendants(userHomepage.path, {});
+      .addConditionToListOnlyDescendants(userHomepage.path);
 
     // Stream processing to delete descendant pages
     // ────────┤ start │─────────

+ 14 - 0
yarn.lock

@@ -3752,6 +3752,13 @@
   dependencies:
     tslib "2.1.0"
 
+"@types/archiver@^6.0.2":
+  version "6.0.2"
+  resolved "https://registry.yarnpkg.com/@types/archiver/-/archiver-6.0.2.tgz#0daf8c83359cbde69de1e4b33dcade6a48a929e2"
+  integrity sha512-KmROQqbQzKGuaAbmK+ZcytkJ51+YqDa7NmbXjmtC5YBLSyQYo21YaUnQ3HbaPFKL1ooo6RQ6OPYPIDyxfpDDXw==
+  dependencies:
+    "@types/readdir-glob" "*"
+
 "@types/argparse@1.0.38":
   version "1.0.38"
   resolved "https://registry.yarnpkg.com/@types/argparse/-/argparse-1.0.38.tgz#a81fd8606d481f873a3800c6ebae4f1d768a56a9"
@@ -4190,6 +4197,13 @@
     "@types/scheduler" "*"
     csstype "^3.0.2"
 
+"@types/readdir-glob@*":
+  version "1.1.5"
+  resolved "https://registry.yarnpkg.com/@types/readdir-glob/-/readdir-glob-1.1.5.tgz#21a4a98898fc606cb568ad815f2a0eedc24d412a"
+  integrity sha512-raiuEPUYqXu+nvtY2Pe8s8FEmZ3x5yAH4VkLdihcPdalvsHltomrRC9BzuStrJ9yk06470hS0Crw0f1pXqD+Hg==
+  dependencies:
+    "@types/node" "*"
+
 "@types/retry@^0.12.0":
   version "0.12.0"
   resolved "https://registry.yarnpkg.com/@types/retry/-/retry-0.12.0.tgz#2b35eccfcee7d38cd72ad99232fbd58bffb3c84d"