export.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. import fs from 'fs';
  2. import path from 'path';
  3. import { Readable, Transform, Writable } from 'stream';
  4. import { isPopulated } from '@growi/core';
  5. import { normalizePath } from '@growi/core/dist/utils/path-utils';
  6. import archiver, { Archiver } from 'archiver';
  7. import { toArrayIfNot } from '~/utils/array-utils';
  8. import loggerFactory from '~/utils/logger';
  9. import { PageModel, PageDocument } from '../models/page';
  10. import CollectionProgress from '../models/vo/collection-progress';
  11. import CollectionProgressingStatus from '../models/vo/collection-progressing-status';
  12. import AppService from './app';
  13. import ConfigLoader from './config-loader';
  14. import GrowiBridgeService from './growi-bridge';
  15. import { ZipFileStat } from './interfaces/export';
  16. const logger = loggerFactory('growi:services:ExportService'); // eslint-disable-line no-unused-vars
  17. const mongoose = require('mongoose');
  18. const streamToPromise = require('stream-to-promise');
  19. class ExportProgressingStatus extends CollectionProgressingStatus {
  20. async init() {
  21. // retrieve total document count from each collections
  22. const promises = this.progressList.map(async(collectionProgress) => {
  23. const collection = mongoose.connection.collection(collectionProgress.collectionName);
  24. collectionProgress.totalCount = await collection.count();
  25. });
  26. await Promise.all(promises);
  27. this.recalculateTotalCount();
  28. }
  29. }
  30. class ExportService {
  31. crowi: any;
  32. appService: AppService;
  33. growiBridgeService: GrowiBridgeService;
  34. getFile: (filename: string) => string;
  35. per = 100;
  36. zlibLevel = 9; // 0(min) - 9(max)
  37. currentProgressingStatus: ExportProgressingStatus | null;
  38. baseDir: string;
  39. adminEvent: any;
  40. constructor(crowi) {
  41. this.crowi = crowi;
  42. this.appService = crowi.appService;
  43. this.growiBridgeService = crowi.growiBridgeService;
  44. this.getFile = this.growiBridgeService.getFile.bind(this);
  45. this.baseDir = path.join(crowi.tmpDir, 'downloads');
  46. this.adminEvent = crowi.event('admin');
  47. this.currentProgressingStatus = null;
  48. }
  49. /**
  50. * parse all zip files in downloads dir
  51. *
  52. * @memberOf ExportService
  53. * @return {object} info for zip files and whether currentProgressingStatus exists
  54. */
  55. async getStatus() {
  56. const zipFiles = fs.readdirSync(this.baseDir).filter(file => path.extname(file) === '.zip');
  57. // process serially so as not to waste memory
  58. const zipFileStats: Array<ZipFileStat | null> = [];
  59. const parseZipFilePromises = zipFiles.map((file) => {
  60. const zipFile = this.getFile(file);
  61. return this.growiBridgeService.parseZipFile(zipFile);
  62. });
  63. for await (const stat of parseZipFilePromises) {
  64. zipFileStats.push(stat);
  65. }
  66. // filter null object (broken zip)
  67. const filtered = zipFileStats.filter(element => element != null);
  68. const isExporting = this.currentProgressingStatus != null;
  69. return {
  70. zipFileStats: filtered,
  71. isExporting,
  72. progressList: isExporting ? this.currentProgressingStatus?.progressList : null,
  73. };
  74. }
  75. /**
  76. * create meta.json
  77. *
  78. * @memberOf ExportService
  79. * @return {string} path to meta.json
  80. */
  81. async createMetaJson(): Promise<string> {
  82. const metaJson = path.join(this.baseDir, this.growiBridgeService.getMetaFileName());
  83. const writeStream = fs.createWriteStream(metaJson, { encoding: this.growiBridgeService.getEncoding() });
  84. const passwordSeed = this.crowi.env.PASSWORD_SEED || null;
  85. const metaData = {
  86. version: this.crowi.version,
  87. url: this.appService.getSiteUrl(),
  88. passwordSeed,
  89. exportedAt: new Date(),
  90. envVars: await ConfigLoader.getEnvVarsForDisplay(),
  91. };
  92. writeStream.write(JSON.stringify(metaData));
  93. writeStream.close();
  94. await streamToPromise(writeStream);
  95. return metaJson;
  96. }
  97. /**
  98. *
  99. * @param {ExportProgress} exportProgress
  100. * @return {Transform}
  101. */
  102. generateLogStream(exportProgress: CollectionProgress | undefined): Transform {
  103. const logProgress = this.logProgress.bind(this);
  104. let count = 0;
  105. return new Transform({
  106. transform(chunk, encoding, callback) {
  107. count++;
  108. logProgress(exportProgress, count);
  109. this.push(chunk);
  110. callback();
  111. },
  112. });
  113. }
  114. /**
  115. * insert beginning/ending brackets and comma separator for Json Array
  116. *
  117. * @memberOf ExportService
  118. * @return {Transform}
  119. */
  120. generateTransformStream(): Transform {
  121. let isFirst = true;
  122. const transformStream = new Transform({
  123. transform(chunk, encoding, callback) {
  124. // write beginning brace
  125. if (isFirst) {
  126. this.push('[');
  127. isFirst = false;
  128. }
  129. // write separator
  130. else {
  131. this.push(',');
  132. }
  133. this.push(chunk);
  134. callback();
  135. },
  136. final(callback) {
  137. // write beginning brace
  138. if (isFirst) {
  139. this.push('[');
  140. }
  141. // write ending brace
  142. this.push(']');
  143. callback();
  144. },
  145. });
  146. return transformStream;
  147. }
  148. /**
  149. * dump a mongodb collection into json
  150. *
  151. * @memberOf ExportService
  152. * @param {string} collectionName collection name
  153. * @return {string} path to zip file
  154. */
  155. async exportCollectionToJson(collectionName: string): Promise<string> {
  156. const collection = mongoose.connection.collection(collectionName);
  157. const nativeCursor = collection.find();
  158. const readStream = nativeCursor.stream({ transform: JSON.stringify });
  159. // get TransformStream
  160. const transformStream = this.generateTransformStream();
  161. // log configuration
  162. const exportProgress = this.currentProgressingStatus?.progressMap[collectionName];
  163. const logStream = this.generateLogStream(exportProgress);
  164. // create WritableStream
  165. const jsonFileToWrite = path.join(this.baseDir, `${collectionName}.json`);
  166. const writeStream = fs.createWriteStream(jsonFileToWrite, { encoding: this.growiBridgeService.getEncoding() });
  167. readStream
  168. .pipe(logStream)
  169. .pipe(transformStream)
  170. .pipe(writeStream);
  171. await streamToPromise(writeStream);
  172. return writeStream.path.toString();
  173. }
  174. /**
  175. * export multiple Collections into json and Zip
  176. *
  177. * @memberOf ExportService
  178. * @param {Array.<string>} collections array of collection name
  179. * @return {Array.<ZipFileStat>} info of zip file created
  180. */
  181. async exportCollectionsToZippedJson(collections: string[]): Promise<ZipFileStat | null> {
  182. const metaJson = await this.createMetaJson();
  183. // process serially so as not to waste memory
  184. const jsonFiles: string[] = [];
  185. const jsonFilesPromises = collections.map(collectionName => this.exportCollectionToJson(collectionName));
  186. for await (const jsonFile of jsonFilesPromises) {
  187. jsonFiles.push(jsonFile);
  188. }
  189. // send terminate event
  190. this.emitStartZippingEvent();
  191. // zip json
  192. const configs = jsonFiles.map((jsonFile) => { return { from: jsonFile, as: path.basename(jsonFile) } });
  193. // add meta.json in zip
  194. configs.push({ from: metaJson, as: path.basename(metaJson) });
  195. // exec zip
  196. const zipFile = await this.zipFiles(configs);
  197. // get stats for the zip file
  198. const addedZipFileStat = await this.growiBridgeService.parseZipFile(zipFile);
  199. // send terminate event
  200. this.emitTerminateEvent(addedZipFileStat);
  201. return addedZipFileStat;
  202. // TODO: remove broken zip file
  203. }
  204. async export(collections: string[]): Promise<ZipFileStat | null> {
  205. if (this.currentProgressingStatus != null) {
  206. throw new Error('There is an exporting process running.');
  207. }
  208. this.currentProgressingStatus = new ExportProgressingStatus(collections);
  209. await this.currentProgressingStatus.init();
  210. let zipFileStat: ZipFileStat | null;
  211. try {
  212. zipFileStat = await this.exportCollectionsToZippedJson(collections);
  213. }
  214. finally {
  215. this.currentProgressingStatus = null;
  216. }
  217. return zipFileStat;
  218. }
  219. /**
  220. * log export progress
  221. *
  222. * @memberOf ExportService
  223. *
  224. * @param {CollectionProgress} collectionProgress
  225. * @param {number} currentCount number of items exported
  226. */
  227. logProgress(collectionProgress: CollectionProgress | undefined, currentCount: number): void {
  228. if (collectionProgress == null) return;
  229. const output = `${collectionProgress.collectionName}: ${currentCount}/${collectionProgress.totalCount} written`;
  230. // update exportProgress.currentCount
  231. collectionProgress.currentCount = currentCount;
  232. // output every this.per items
  233. if (currentCount % this.per === 0) {
  234. logger.debug(output);
  235. this.emitProgressEvent();
  236. }
  237. // output last item
  238. else if (currentCount === collectionProgress.totalCount) {
  239. logger.info(output);
  240. this.emitProgressEvent();
  241. }
  242. }
  243. /**
  244. * emit progress event
  245. */
  246. emitProgressEvent(): void {
  247. const data = {
  248. currentCount: this.currentProgressingStatus?.currentCount,
  249. totalCount: this.currentProgressingStatus?.totalCount,
  250. progressList: this.currentProgressingStatus?.progressList,
  251. };
  252. // send event (in progress in global)
  253. this.adminEvent.emit('onProgressForExport', data);
  254. }
  255. /**
  256. * emit start zipping event
  257. */
  258. emitStartZippingEvent(): void {
  259. this.adminEvent.emit('onStartZippingForExport', {});
  260. }
  261. /**
  262. * emit terminate event
  263. * @param {object} zipFileStat added zip file status data
  264. */
  265. emitTerminateEvent(zipFileStat: ZipFileStat | null): void {
  266. this.adminEvent.emit('onTerminateForExport', { addedZipFileStat: zipFileStat });
  267. }
  268. /**
  269. * zip files into one zip file
  270. *
  271. * @memberOf ExportService
  272. * @param {object|array<object>} configs object or array of object { from: "path to source file", as: "file name after unzipped" }
  273. * @return {string} absolute path to the zip file
  274. * @see https://www.archiverjs.com/#quick-start
  275. */
  276. async zipFiles(_configs: {from: string, as: string}[]): Promise<string> {
  277. const configs = toArrayIfNot(_configs);
  278. const appTitle = this.appService.getAppTitle();
  279. const timeStamp = (new Date()).getTime();
  280. const zipFile = path.join(this.baseDir, `${appTitle}-${timeStamp}.growi.zip`);
  281. const archive = archiver('zip', {
  282. zlib: { level: this.zlibLevel },
  283. });
  284. // good practice to catch warnings (ie stat failures and other non-blocking errors)
  285. archive.on('warning', (err) => {
  286. if (err.code === 'ENOENT') logger.error(err);
  287. else throw err;
  288. });
  289. // good practice to catch this error explicitly
  290. archive.on('error', (err) => { throw err });
  291. for (const { from, as } of configs) {
  292. const input = fs.createReadStream(from);
  293. // append a file from stream
  294. archive.append(input, { name: as });
  295. }
  296. const output = fs.createWriteStream(zipFile);
  297. // pipe archive data to the file
  298. archive.pipe(output);
  299. // finalize the archive (ie we are done appending files but streams have to finish yet)
  300. // 'close', 'end' or 'finish' may be fired right after calling this method so register to them beforehand
  301. archive.finalize();
  302. await streamToPromise(archive);
  303. logger.info(`zipped GROWI data into ${zipFile} (${archive.pointer()} bytes)`);
  304. // delete json files
  305. for (const { from } of configs) {
  306. fs.unlinkSync(from);
  307. }
  308. return zipFile;
  309. }
  310. getReadStreamFromRevision(revision, format): Readable {
  311. const data = revision.body;
  312. const readable = new Readable();
  313. readable._read = () => {};
  314. readable.push(data);
  315. readable.push(null);
  316. return readable;
  317. }
  318. }
  319. // eslint-disable-next-line import/no-mutable-exports
  320. export let exportService: ExportService | undefined; // singleton instance
  321. export default function instanciate(crowi: any): void {
  322. exportService = new ExportService(crowi);
  323. }