export.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. import fs from 'fs';
  2. import path from 'path';
  3. import { Readable, Transform } from 'stream';
  4. import archiver from 'archiver';
  5. import { toArrayIfNot } from '~/utils/array-utils';
  6. import { getGrowiVersion } from '~/utils/growi-version';
  7. import loggerFactory from '~/utils/logger';
  8. import type CollectionProgress from '../models/vo/collection-progress';
  9. import CollectionProgressingStatus from '../models/vo/collection-progressing-status';
  10. import type AppService from './app';
  11. import { configManager } from './config-manager';
  12. import type { GrowiBridgeService } from './growi-bridge';
  13. import { growiInfoService } from './growi-info';
  14. import type { ZipFileStat } from './interfaces/export';
  15. const logger = loggerFactory('growi:services:ExportService');
  16. const { pipeline, finished } = require('stream/promises');
  17. const mongoose = require('mongoose');
  18. class ExportProgressingStatus extends CollectionProgressingStatus {
  19. async init() {
  20. // retrieve total document count from each collections
  21. const promises = this.progressList.map(async(collectionProgress) => {
  22. const collection = mongoose.connection.collection(collectionProgress.collectionName);
  23. collectionProgress.totalCount = await collection.count();
  24. });
  25. await Promise.all(promises);
  26. this.recalculateTotalCount();
  27. }
  28. }
  29. class ExportService {
  30. crowi: any;
  31. appService: AppService;
  32. growiBridgeService: GrowiBridgeService;
  33. per = 100;
  34. zlibLevel = 9; // 0(min) - 9(max)
  35. currentProgressingStatus: ExportProgressingStatus | null;
  36. baseDir: string;
  37. adminEvent: any;
  38. constructor(crowi) {
  39. this.crowi = crowi;
  40. this.appService = crowi.appService;
  41. this.growiBridgeService = crowi.growiBridgeService;
  42. this.baseDir = path.join(crowi.tmpDir, 'downloads');
  43. this.adminEvent = crowi.event('admin');
  44. this.currentProgressingStatus = null;
  45. }
  46. /**
  47. *
  48. * @param {string} fileName
  49. * @returns {string} path to the file
  50. */
  51. getFile(fileName) {
  52. return this.growiBridgeService.getFile(fileName, this.baseDir);
  53. }
  54. /**
  55. * parse all zip files in downloads dir
  56. *
  57. * @memberOf ExportService
  58. * @return {object} info for zip files and whether currentProgressingStatus exists
  59. */
  60. async getStatus() {
  61. const zipFiles = fs.readdirSync(this.baseDir).filter(file => path.extname(file) === '.zip');
  62. // process serially so as not to waste memory
  63. const zipFileStats: Array<ZipFileStat | null> = [];
  64. const parseZipFilePromises = zipFiles.map((file) => {
  65. const zipFile = this.getFile(file);
  66. return this.growiBridgeService.parseZipFile(zipFile);
  67. });
  68. for await (const stat of parseZipFilePromises) {
  69. zipFileStats.push(stat);
  70. }
  71. // filter null object (broken zip)
  72. const filtered = zipFileStats.filter(element => element != null);
  73. const isExporting = this.currentProgressingStatus != null;
  74. return {
  75. zipFileStats: filtered,
  76. isExporting,
  77. progressList: isExporting ? this.currentProgressingStatus?.progressList : null,
  78. };
  79. }
  80. /**
  81. * create meta.json
  82. *
  83. * @memberOf ExportService
  84. * @return {string} path to meta.json
  85. */
  86. async createMetaJson(): Promise<string> {
  87. const metaJson = path.join(this.baseDir, this.growiBridgeService.getMetaFileName());
  88. const writeStream = fs.createWriteStream(metaJson, { encoding: this.growiBridgeService.getEncoding() });
  89. const passwordSeed = this.crowi.env.PASSWORD_SEED || null;
  90. const metaData = {
  91. version: getGrowiVersion(),
  92. url: growiInfoService.getSiteUrl(),
  93. passwordSeed,
  94. exportedAt: new Date(),
  95. envVars: configManager.getManagedEnvVars(),
  96. };
  97. writeStream.write(JSON.stringify(metaData));
  98. writeStream.close();
  99. await finished(writeStream);
  100. return metaJson;
  101. }
  102. /**
  103. *
  104. * @param {ExportProgress} exportProgress
  105. * @return {Transform}
  106. */
  107. generateLogStream(exportProgress: CollectionProgress | undefined): Transform {
  108. const logProgress = this.logProgress.bind(this);
  109. let count = 0;
  110. return new Transform({
  111. transform(chunk, encoding, callback) {
  112. count++;
  113. logProgress(exportProgress, count);
  114. this.push(chunk);
  115. callback();
  116. },
  117. });
  118. }
  119. /**
  120. * insert beginning/ending brackets and comma separator for Json Array
  121. *
  122. * @memberOf ExportService
  123. * @return {Transform}
  124. */
  125. generateTransformStream(): Transform {
  126. let isFirst = true;
  127. const transformStream = new Transform({
  128. transform(chunk, encoding, callback) {
  129. // write beginning brace
  130. if (isFirst) {
  131. this.push('[');
  132. isFirst = false;
  133. }
  134. // write separator
  135. else {
  136. this.push(',');
  137. }
  138. this.push(chunk);
  139. callback();
  140. },
  141. final(callback) {
  142. // write beginning brace
  143. if (isFirst) {
  144. this.push('[');
  145. }
  146. // write ending brace
  147. this.push(']');
  148. callback();
  149. },
  150. });
  151. return transformStream;
  152. }
  153. /**
  154. * dump a mongodb collection into json
  155. *
  156. * @memberOf ExportService
  157. * @param {string} collectionName collection name
  158. * @return {string} path to zip file
  159. */
  160. async exportCollectionToJson(collectionName: string): Promise<string> {
  161. const collection = mongoose.connection.collection(collectionName);
  162. const nativeCursor = collection.find();
  163. const readStream = nativeCursor.stream({ transform: JSON.stringify });
  164. // get TransformStream
  165. const transformStream = this.generateTransformStream();
  166. // log configuration
  167. const exportProgress = this.currentProgressingStatus?.progressMap[collectionName];
  168. const logStream = this.generateLogStream(exportProgress);
  169. // create WritableStream
  170. const jsonFileToWrite = path.join(this.baseDir, `${collectionName}.json`);
  171. const writeStream = fs.createWriteStream(jsonFileToWrite, { encoding: this.growiBridgeService.getEncoding() });
  172. await pipeline(readStream, logStream, transformStream, writeStream);
  173. return writeStream.path.toString();
  174. }
  175. /**
  176. * export multiple Collections into json and Zip
  177. *
  178. * @memberOf ExportService
  179. * @param {Array.<string>} collections array of collection name
  180. * @return {Array.<ZipFileStat>} info of zip file created
  181. */
  182. async exportCollectionsToZippedJson(collections: string[]): Promise<ZipFileStat | null> {
  183. const metaJson = await this.createMetaJson();
  184. // process serially so as not to waste memory
  185. const jsonFiles: string[] = [];
  186. const jsonFilesPromises = collections.map(collectionName => this.exportCollectionToJson(collectionName));
  187. for await (const jsonFile of jsonFilesPromises) {
  188. jsonFiles.push(jsonFile);
  189. }
  190. // send terminate event
  191. this.emitStartZippingEvent();
  192. // zip json
  193. const configs = jsonFiles.map((jsonFile) => { return { from: jsonFile, as: path.basename(jsonFile) } });
  194. // add meta.json in zip
  195. configs.push({ from: metaJson, as: path.basename(metaJson) });
  196. // exec zip
  197. const zipFile = await this.zipFiles(configs);
  198. // get stats for the zip file
  199. const addedZipFileStat = await this.growiBridgeService.parseZipFile(zipFile);
  200. // send terminate event
  201. this.emitTerminateEvent(addedZipFileStat);
  202. return addedZipFileStat;
  203. // TODO: remove broken zip file
  204. }
  205. async export(collections: string[]): Promise<ZipFileStat | null> {
  206. if (this.currentProgressingStatus != null) {
  207. throw new Error('There is an exporting process running.');
  208. }
  209. this.currentProgressingStatus = new ExportProgressingStatus(collections);
  210. await this.currentProgressingStatus.init();
  211. let zipFileStat: ZipFileStat | null;
  212. try {
  213. zipFileStat = await this.exportCollectionsToZippedJson(collections);
  214. }
  215. finally {
  216. this.currentProgressingStatus = null;
  217. }
  218. return zipFileStat;
  219. }
  220. /**
  221. * log export progress
  222. *
  223. * @memberOf ExportService
  224. *
  225. * @param {CollectionProgress} collectionProgress
  226. * @param {number} currentCount number of items exported
  227. */
  228. logProgress(collectionProgress: CollectionProgress | undefined, currentCount: number): void {
  229. if (collectionProgress == null) return;
  230. const output = `${collectionProgress.collectionName}: ${currentCount}/${collectionProgress.totalCount} written`;
  231. // update exportProgress.currentCount
  232. collectionProgress.currentCount = currentCount;
  233. // output every this.per items
  234. if (currentCount % this.per === 0) {
  235. logger.debug(output);
  236. this.emitProgressEvent();
  237. }
  238. // output last item
  239. else if (currentCount === collectionProgress.totalCount) {
  240. logger.info(output);
  241. this.emitProgressEvent();
  242. }
  243. }
  244. /**
  245. * emit progress event
  246. */
  247. emitProgressEvent(): void {
  248. const data = {
  249. currentCount: this.currentProgressingStatus?.currentCount,
  250. totalCount: this.currentProgressingStatus?.totalCount,
  251. progressList: this.currentProgressingStatus?.progressList,
  252. };
  253. // send event (in progress in global)
  254. this.adminEvent.emit('onProgressForExport', data);
  255. }
  256. /**
  257. * emit start zipping event
  258. */
  259. emitStartZippingEvent(): void {
  260. this.adminEvent.emit('onStartZippingForExport', {});
  261. }
  262. /**
  263. * emit terminate event
  264. * @param {object} zipFileStat added zip file status data
  265. */
  266. emitTerminateEvent(zipFileStat: ZipFileStat | null): void {
  267. this.adminEvent.emit('onTerminateForExport', { addedZipFileStat: zipFileStat });
  268. }
  269. /**
  270. * zip files into one zip file
  271. *
  272. * @memberOf ExportService
  273. * @param {object|array<object>} configs object or array of object { from: "path to source file", as: "file name after unzipped" }
  274. * @return {string} absolute path to the zip file
  275. * @see https://www.archiverjs.com/#quick-start
  276. */
  277. async zipFiles(_configs: {from: string, as: string}[]): Promise<string> {
  278. const configs = toArrayIfNot(_configs);
  279. const appTitle = this.appService.getAppTitle();
  280. const timeStamp = (new Date()).getTime();
  281. const zipFile = path.join(this.baseDir, `${appTitle}-${timeStamp}.growi.zip`);
  282. const archive = archiver('zip', {
  283. zlib: { level: this.zlibLevel },
  284. });
  285. // good practice to catch warnings (ie stat failures and other non-blocking errors)
  286. archive.on('warning', (err) => {
  287. if (err.code === 'ENOENT') logger.error(err);
  288. else throw err;
  289. });
  290. // good practice to catch this error explicitly
  291. archive.on('error', (err) => { throw err });
  292. for (const { from, as } of configs) {
  293. const input = fs.createReadStream(from);
  294. // append a file from stream
  295. archive.append(input, { name: as });
  296. }
  297. const output = fs.createWriteStream(zipFile);
  298. // finalize the archive (ie we are done appending files but streams have to finish yet)
  299. // 'close', 'end' or 'finish' may be fired right after calling this method so register to them beforehand
  300. archive.finalize();
  301. // pipe archive data to the file
  302. await pipeline(archive, output);
  303. logger.info(`zipped GROWI data into ${zipFile} (${archive.pointer()} bytes)`);
  304. // delete json files
  305. for (const { from } of configs) {
  306. fs.unlinkSync(from);
  307. }
  308. return zipFile;
  309. }
  310. getReadStreamFromRevision(revision, format): Readable {
  311. const data = revision.body;
  312. const readable = new Readable();
  313. readable._read = () => {};
  314. readable.push(data);
  315. readable.push(null);
  316. return readable;
  317. }
  318. }
  319. // eslint-disable-next-line import/no-mutable-exports
  320. export let exportService: ExportService | undefined; // singleton instance
  321. export default function instanciate(crowi: any): void {
  322. exportService = new ExportService(crowi);
  323. }