export.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. import archiver from 'archiver';
  2. import fs from 'fs';
  3. import path from 'path';
  4. import { Readable, Transform } from 'stream';
  5. import { toArrayIfNot } from '~/utils/array-utils';
  6. import { getGrowiVersion } from '~/utils/growi-version';
  7. import loggerFactory from '~/utils/logger';
  8. import type CollectionProgress from '../models/vo/collection-progress';
  9. import CollectionProgressingStatus from '../models/vo/collection-progressing-status';
  10. import type AppService from './app';
  11. import { configManager } from './config-manager';
  12. import type { GrowiBridgeService } from './growi-bridge';
  13. import { growiInfoService } from './growi-info';
  14. import type { ZipFileStat } from './interfaces/export';
  15. const logger = loggerFactory('growi:services:ExportService');
  16. const { pipeline, finished } = require('stream/promises');
  17. const mongoose = require('mongoose');
  18. class ExportProgressingStatus extends CollectionProgressingStatus {
  19. async init() {
  20. // retrieve total document count from each collections
  21. const promises = this.progressList.map(async (collectionProgress) => {
  22. const collection = mongoose.connection.collection(
  23. collectionProgress.collectionName,
  24. );
  25. collectionProgress.totalCount = await collection.count();
  26. });
  27. await Promise.all(promises);
  28. this.recalculateTotalCount();
  29. }
  30. }
  31. class ExportService {
  32. crowi: any;
  33. appService: AppService;
  34. growiBridgeService: GrowiBridgeService;
  35. per = 100;
  36. zlibLevel = 9; // 0(min) - 9(max)
  37. currentProgressingStatus: ExportProgressingStatus | null;
  38. baseDir: string;
  39. adminEvent: any;
  40. constructor(crowi) {
  41. this.crowi = crowi;
  42. this.appService = crowi.appService;
  43. this.growiBridgeService = crowi.growiBridgeService;
  44. this.baseDir = path.join(crowi.tmpDir, 'downloads');
  45. this.adminEvent = crowi.events.admin;
  46. this.currentProgressingStatus = null;
  47. }
  48. /**
  49. *
  50. * @param {string} fileName
  51. * @returns {string} path to the file
  52. */
  53. getFile(fileName) {
  54. return this.growiBridgeService.getFile(fileName, this.baseDir);
  55. }
  56. /**
  57. * parse all zip files in downloads dir
  58. *
  59. * @memberOf ExportService
  60. * @return {object} info for zip files and whether currentProgressingStatus exists
  61. */
  62. async getStatus() {
  63. const zipFiles = fs
  64. .readdirSync(this.baseDir)
  65. .filter((file) => path.extname(file) === '.zip');
  66. // process serially so as not to waste memory
  67. const zipFileStats: Array<ZipFileStat | null> = [];
  68. const parseZipFilePromises = zipFiles.map((file) => {
  69. const zipFile = this.getFile(file);
  70. return this.growiBridgeService.parseZipFile(zipFile);
  71. });
  72. for await (const stat of parseZipFilePromises) {
  73. zipFileStats.push(stat);
  74. }
  75. // filter null object (broken zip)
  76. const filtered = zipFileStats.filter((element) => element != null);
  77. const isExporting = this.currentProgressingStatus != null;
  78. return {
  79. zipFileStats: filtered,
  80. isExporting,
  81. progressList: isExporting
  82. ? this.currentProgressingStatus?.progressList
  83. : null,
  84. };
  85. }
  86. /**
  87. * create meta.json
  88. *
  89. * @memberOf ExportService
  90. * @return {string} path to meta.json
  91. */
  92. async createMetaJson(): Promise<string> {
  93. const metaJson = path.join(
  94. this.baseDir,
  95. this.growiBridgeService.getMetaFileName(),
  96. );
  97. const writeStream = fs.createWriteStream(metaJson, {
  98. encoding: this.growiBridgeService.getEncoding(),
  99. });
  100. const passwordSeed = this.crowi.env.PASSWORD_SEED || null;
  101. const metaData = {
  102. version: getGrowiVersion(),
  103. url: growiInfoService.getSiteUrl(),
  104. passwordSeed,
  105. exportedAt: new Date(),
  106. envVars: configManager.getManagedEnvVars(),
  107. };
  108. writeStream.write(JSON.stringify(metaData));
  109. writeStream.close();
  110. await finished(writeStream);
  111. return metaJson;
  112. }
  113. /**
  114. *
  115. * @param {ExportProgress} exportProgress
  116. * @return {Transform}
  117. */
  118. generateLogStream(exportProgress: CollectionProgress | undefined): Transform {
  119. const logProgress = this.logProgress.bind(this);
  120. let count = 0;
  121. return new Transform({
  122. transform(chunk, encoding, callback) {
  123. count++;
  124. logProgress(exportProgress, count);
  125. this.push(chunk);
  126. callback();
  127. },
  128. });
  129. }
  130. /**
  131. * insert beginning/ending brackets and comma separator for Json Array
  132. *
  133. * @memberOf ExportService
  134. * @return {Transform}
  135. */
  136. generateTransformStream(): Transform {
  137. let isFirst = true;
  138. const transformStream = new Transform({
  139. transform(chunk, encoding, callback) {
  140. // write beginning brace
  141. if (isFirst) {
  142. this.push('[');
  143. isFirst = false;
  144. }
  145. // write separator
  146. else {
  147. this.push(',');
  148. }
  149. this.push(chunk);
  150. callback();
  151. },
  152. final(callback) {
  153. // write beginning brace
  154. if (isFirst) {
  155. this.push('[');
  156. }
  157. // write ending brace
  158. this.push(']');
  159. callback();
  160. },
  161. });
  162. return transformStream;
  163. }
  164. /**
  165. * dump a mongodb collection into json
  166. *
  167. * @memberOf ExportService
  168. * @param {string} collectionName collection name
  169. * @return {string} path to zip file
  170. */
  171. async exportCollectionToJson(collectionName: string): Promise<string> {
  172. const collection = mongoose.connection.collection(collectionName);
  173. const nativeCursor = collection.find();
  174. const readStream = nativeCursor.stream({ transform: JSON.stringify });
  175. // get TransformStream
  176. const transformStream = this.generateTransformStream();
  177. // log configuration
  178. const exportProgress =
  179. this.currentProgressingStatus?.progressMap[collectionName];
  180. const logStream = this.generateLogStream(exportProgress);
  181. // create WritableStream
  182. const jsonFileToWrite = path.join(this.baseDir, `${collectionName}.json`);
  183. const writeStream = fs.createWriteStream(jsonFileToWrite, {
  184. encoding: this.growiBridgeService.getEncoding(),
  185. });
  186. await pipeline(readStream, logStream, transformStream, writeStream);
  187. return writeStream.path.toString();
  188. }
  189. /**
  190. * export multiple Collections into json and Zip
  191. *
  192. * @memberOf ExportService
  193. * @param {Array.<string>} collections array of collection name
  194. * @return {Array.<ZipFileStat>} info of zip file created
  195. */
  196. async exportCollectionsToZippedJson(
  197. collections: string[],
  198. ): Promise<ZipFileStat | null> {
  199. const metaJson = await this.createMetaJson();
  200. // process serially so as not to waste memory
  201. const jsonFiles: string[] = [];
  202. const jsonFilesPromises = collections.map((collectionName) =>
  203. this.exportCollectionToJson(collectionName),
  204. );
  205. for await (const jsonFile of jsonFilesPromises) {
  206. jsonFiles.push(jsonFile);
  207. }
  208. // send terminate event
  209. this.emitStartZippingEvent();
  210. // zip json
  211. const configs = jsonFiles.map((jsonFile) => {
  212. return { from: jsonFile, as: path.basename(jsonFile) };
  213. });
  214. // add meta.json in zip
  215. configs.push({ from: metaJson, as: path.basename(metaJson) });
  216. // exec zip
  217. const zipFile = await this.zipFiles(configs);
  218. // get stats for the zip file
  219. const addedZipFileStat =
  220. await this.growiBridgeService.parseZipFile(zipFile);
  221. // send terminate event
  222. this.emitTerminateEvent(addedZipFileStat);
  223. return addedZipFileStat;
  224. // TODO: remove broken zip file
  225. }
  226. async export(collections: string[]): Promise<ZipFileStat | null> {
  227. if (this.currentProgressingStatus != null) {
  228. throw new Error('There is an exporting process running.');
  229. }
  230. this.currentProgressingStatus = new ExportProgressingStatus(collections);
  231. await this.currentProgressingStatus.init();
  232. let zipFileStat: ZipFileStat | null;
  233. try {
  234. zipFileStat = await this.exportCollectionsToZippedJson(collections);
  235. } finally {
  236. this.currentProgressingStatus = null;
  237. }
  238. return zipFileStat;
  239. }
  240. /**
  241. * log export progress
  242. *
  243. * @memberOf ExportService
  244. *
  245. * @param {CollectionProgress} collectionProgress
  246. * @param {number} currentCount number of items exported
  247. */
  248. logProgress(
  249. collectionProgress: CollectionProgress | undefined,
  250. currentCount: number,
  251. ): void {
  252. if (collectionProgress == null) return;
  253. const output = `${collectionProgress.collectionName}: ${currentCount}/${collectionProgress.totalCount} written`;
  254. // update exportProgress.currentCount
  255. collectionProgress.currentCount = currentCount;
  256. // output every this.per items
  257. if (currentCount % this.per === 0) {
  258. logger.debug(output);
  259. this.emitProgressEvent();
  260. }
  261. // output last item
  262. else if (currentCount === collectionProgress.totalCount) {
  263. logger.info(output);
  264. this.emitProgressEvent();
  265. }
  266. }
  267. /**
  268. * emit progress event
  269. */
  270. emitProgressEvent(): void {
  271. const data = {
  272. currentCount: this.currentProgressingStatus?.currentCount,
  273. totalCount: this.currentProgressingStatus?.totalCount,
  274. progressList: this.currentProgressingStatus?.progressList,
  275. };
  276. // send event (in progress in global)
  277. this.adminEvent.emit('onProgressForExport', data);
  278. }
  279. /**
  280. * emit start zipping event
  281. */
  282. emitStartZippingEvent(): void {
  283. this.adminEvent.emit('onStartZippingForExport', {});
  284. }
  285. /**
  286. * emit terminate event
  287. * @param {object} zipFileStat added zip file status data
  288. */
  289. emitTerminateEvent(zipFileStat: ZipFileStat | null): void {
  290. this.adminEvent.emit('onTerminateForExport', {
  291. addedZipFileStat: zipFileStat,
  292. });
  293. }
  294. /**
  295. * zip files into one zip file
  296. *
  297. * @memberOf ExportService
  298. * @param {object|array<object>} configs object or array of object { from: "path to source file", as: "file name after unzipped" }
  299. * @return {string} absolute path to the zip file
  300. * @see https://www.archiverjs.com/#quick-start
  301. */
  302. async zipFiles(_configs: { from: string; as: string }[]): Promise<string> {
  303. const configs = toArrayIfNot(_configs);
  304. const appTitle = this.appService.getAppTitle();
  305. const timeStamp = new Date().getTime();
  306. const zipFile = path.join(
  307. this.baseDir,
  308. `${appTitle}-${timeStamp}.growi.zip`,
  309. );
  310. const archive = archiver('zip', {
  311. zlib: { level: this.zlibLevel },
  312. });
  313. // good practice to catch warnings (ie stat failures and other non-blocking errors)
  314. archive.on('warning', (err) => {
  315. if (err.code === 'ENOENT') logger.error(err);
  316. else throw err;
  317. });
  318. // good practice to catch this error explicitly
  319. archive.on('error', (err) => {
  320. throw err;
  321. });
  322. for (const { from, as } of configs) {
  323. const input = fs.createReadStream(from);
  324. // append a file from stream
  325. archive.append(input, { name: as });
  326. }
  327. const output = fs.createWriteStream(zipFile);
  328. // finalize the archive (ie we are done appending files but streams have to finish yet)
  329. // 'close', 'end' or 'finish' may be fired right after calling this method so register to them beforehand
  330. archive.finalize();
  331. // pipe archive data to the file
  332. await pipeline(archive, output);
  333. logger.info(
  334. `zipped GROWI data into ${zipFile} (${archive.pointer()} bytes)`,
  335. );
  336. // delete json files
  337. for (const { from } of configs) {
  338. fs.unlinkSync(from);
  339. }
  340. return zipFile;
  341. }
  342. getReadStreamFromRevision(revision, format): Readable {
  343. const data = revision.body;
  344. const readable = new Readable();
  345. readable._read = () => {};
  346. readable.push(data);
  347. readable.push(null);
  348. return readable;
  349. }
  350. }
  351. export let exportService: ExportService | undefined; // singleton instance
  352. export default function instanciate(crowi: any): void {
  353. exportService = new ExportService(crowi);
  354. }