import.js 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. const logger = require('@alias/logger')('growi:services:ImportService'); // eslint-disable-line no-unused-vars
  2. const fs = require('fs');
  3. const path = require('path');
  4. const JSONStream = require('JSONStream');
  5. const streamToPromise = require('stream-to-promise');
  6. const unzipper = require('unzipper');
  7. const { ObjectId } = require('mongoose').Types;
  8. class ImportService {
  9. constructor(crowi) {
  10. this.crowi = crowi;
  11. this.baseDir = path.join(crowi.tmpDir, 'imports');
  12. this.metaFileName = 'meta.json';
  13. this.encoding = 'utf-8';
  14. this.per = 100;
  15. this.keepOriginal = this.keepOriginal.bind(this);
  16. // { pages: Page, users: User, ... }
  17. this.collectionMap = {};
  18. this.initCollectionMap(crowi.models);
  19. // { pages: { _id: ..., path: ..., ...}, users: { _id: ..., username: ..., }, ... }
  20. this.convertMap = {};
  21. this.initConvertMap(crowi.models);
  22. }
  23. /**
  24. * initialize collection map
  25. *
  26. * @memberOf ImportService
  27. * @param {object} models from models/index.js
  28. */
  29. initCollectionMap(models) {
  30. for (const model of Object.values(models)) {
  31. this.collectionMap[model.collection.collectionName] = model;
  32. }
  33. }
  34. /**
  35. * initialize convert map. set keepOriginal as default
  36. *
  37. * @memberOf ImportService
  38. * @param {object} models from models/index.js
  39. */
  40. initConvertMap(models) {
  41. // by default, original value is used for imported documents
  42. for (const model of Object.values(models)) {
  43. const { collectionName } = model.collection;
  44. this.convertMap[collectionName] = {};
  45. for (const key of Object.keys(model.schema.paths)) {
  46. this.convertMap[collectionName][key] = this.keepOriginal;
  47. }
  48. }
  49. }
  50. /**
  51. * keep original value
  52. * automatically convert ObjectId
  53. *
  54. * @memberOf ImportService
  55. * @param {any} _value value from imported document
  56. * @param {{ _document: object, schema: object, key: string }}
  57. * @return {any} new value for the document
  58. */
  59. keepOriginal(_value, { _document, schema, key }) {
  60. let value;
  61. if (schema[key].instance === 'ObjectID' && ObjectId.isValid(_value)) {
  62. value = ObjectId(_value);
  63. }
  64. else {
  65. value = _value;
  66. }
  67. return value;
  68. }
  69. /**
  70. * import a collection from json
  71. *
  72. * @memberOf ImportService
  73. * @param {object} Model instance of mongoose model
  74. * @param {string} jsonFile absolute path to the jsonFile being imported
  75. * @param {object} overwriteParams overwrite each document with unrelated value. e.g. { creator: req.user }
  76. */
  77. async import(Model, jsonFile, overwriteParams = {}) {
  78. const { collectionName } = Model.collection;
  79. let counter = 0;
  80. let nInsertedTotal = 0;
  81. let failedIds = [];
  82. let unorderedBulkOp = Model.collection.initializeUnorderedBulkOp();
  83. const readStream = fs.createReadStream(jsonFile, { encoding: this.encoding });
  84. const jsonStream = readStream.pipe(JSONStream.parse('*'));
  85. jsonStream.on('data', async(document) => {
  86. // documents are not persisted until unorderedBulkOp.execute()
  87. unorderedBulkOp.insert(this.convertDocuments(Model, document, overwriteParams));
  88. counter++;
  89. if (counter % this.per === 0) {
  90. // puase jsonStream to prevent more items to be added to unorderedBulkOp
  91. jsonStream.pause();
  92. const { nInserted, failed } = await this.execUnorderedBulkOpSafely(unorderedBulkOp);
  93. nInsertedTotal += nInserted;
  94. failedIds = [...failedIds, ...failed];
  95. // reset initializeUnorderedBulkOp
  96. unorderedBulkOp = Model.collection.initializeUnorderedBulkOp();
  97. // resume jsonStream
  98. jsonStream.resume();
  99. }
  100. });
  101. jsonStream.on('end', async(data) => {
  102. // insert the rest. avoid errors when unorderedBulkOp has no items
  103. if (unorderedBulkOp.s.currentBatch !== null) {
  104. const { nInserted, failed } = await this.execUnorderedBulkOpSafely(unorderedBulkOp);
  105. nInsertedTotal += nInserted;
  106. failedIds = [...failedIds, ...failed];
  107. }
  108. logger.info(`Done. Inserted ${nInsertedTotal} ${collectionName}.`);
  109. if (failedIds.length > 0) {
  110. logger.error(`Failed to insert ${failedIds.length} ${collectionName}: ${failedIds.join(', ')}.`);
  111. }
  112. });
  113. // streamToPromise(jsonStream) throws error, so await readStream instead
  114. await streamToPromise(readStream);
  115. // clean up tmp directory
  116. fs.unlinkSync(jsonFile);
  117. }
  118. /**
  119. * extract a zip file
  120. *
  121. * @memberOf ImportService
  122. * @param {string} zipFile absolute path to zip file
  123. * @return {Array.<string>} array of absolute paths to extracted files
  124. */
  125. async unzip(zipFile) {
  126. const readStream = fs.createReadStream(zipFile);
  127. const unzipStream = readStream.pipe(unzipper.Parse());
  128. const files = [];
  129. unzipStream.on('entry', (entry) => {
  130. const fileName = entry.path;
  131. if (fileName === this.metaFileName) {
  132. // skip meta.json
  133. entry.autodrain();
  134. }
  135. else {
  136. const jsonFile = path.join(this.baseDir, fileName);
  137. const writeStream = fs.createWriteStream(jsonFile, { encoding: this.encoding });
  138. entry.pipe(writeStream);
  139. files.push(jsonFile);
  140. }
  141. });
  142. await streamToPromise(unzipStream);
  143. return files;
  144. }
  145. /**
  146. * execute unorderedBulkOp and ignore errors
  147. *
  148. * @memberOf ImportService
  149. * @param {object} unorderedBulkOp result of Model.collection.initializeUnorderedBulkOp()
  150. * @return {{nInserted: number, failed: Array.<string>}} number of docuemnts inserted and failed
  151. */
  152. async execUnorderedBulkOpSafely(unorderedBulkOp) {
  153. // keep the number of documents inserted and failed for logger
  154. let nInserted = 0;
  155. const failed = [];
  156. // try catch to skip errors
  157. try {
  158. const log = await unorderedBulkOp.execute();
  159. nInserted = log.result.nInserted;
  160. }
  161. catch (err) {
  162. for (const error of err.result.result.writeErrors) {
  163. logger.error(error.errmsg);
  164. failed.push(error.err.op._id);
  165. }
  166. nInserted = err.result.result.nInserted;
  167. }
  168. logger.debug(`Importing ${unorderedBulkOp.s.collection.s.name}. Inserted: ${nInserted}. Failed: ${failed.length}.`);
  169. return {
  170. nInserted,
  171. failed,
  172. };
  173. }
  174. /**
  175. * execute unorderedBulkOp and ignore errors
  176. *
  177. * @memberOf ImportService
  178. * @param {object} Model instance of mongoose model
  179. * @param {object} _document document being imported
  180. * @param {object} overwriteParams overwrite each document with unrelated value. e.g. { creator: req.user }
  181. * @return {object} document to be persisted
  182. */
  183. convertDocuments(Model, _document, overwriteParams) {
  184. const collectionName = Model.collection.collectionName;
  185. const schema = Model.schema.paths;
  186. const convertMap = this.convertMap[collectionName];
  187. if (convertMap == null) {
  188. throw new Error(`attribute map is not defined for ${collectionName}`);
  189. }
  190. const document = {};
  191. // assign value from documents being imported
  192. for (const entry of Object.entries(convertMap)) {
  193. const [key, value] = entry;
  194. // distinguish between null and undefined
  195. if (_document[key] === undefined) {
  196. continue; // next entry
  197. }
  198. document[key] = (typeof value === 'function') ? value(_document[key], { _document, key, schema }) : value;
  199. }
  200. // overwrite documents with custom values
  201. for (const entry of Object.entries(overwriteParams)) {
  202. const [key, value] = entry;
  203. // distinguish between null and undefined
  204. if (_document[key] !== undefined) {
  205. document[key] = (typeof value === 'function') ? value(_document[key], { _document, key, schema }) : value;
  206. }
  207. }
  208. return document;
  209. }
  210. /**
  211. * get a model from collection name
  212. *
  213. * @memberOf ImportService
  214. * @param {string} collectionName collection name
  215. * @return {object} instance of mongoose model
  216. */
  217. getModelFromCollectionName(collectionName) {
  218. const Model = this.collectionMap[collectionName];
  219. if (Model == null) {
  220. throw new Error(`cannot find a model for collection name "${collectionName}"`);
  221. }
  222. return Model;
  223. }
  224. /**
  225. * get the absolute path to a file
  226. *
  227. * @memberOf ImportService
  228. * @param {string} fileName base name of file
  229. * @return {string} absolute path to the file
  230. */
  231. getFile(fileName) {
  232. const jsonFile = path.join(this.baseDir, fileName);
  233. // throws err if the file does not exist
  234. fs.accessSync(jsonFile);
  235. return jsonFile;
  236. }
  237. /**
  238. * validate using meta.json
  239. * to pass validation, all the criteria must be met
  240. * - ${version of this growi} === ${version of growi that exported data}
  241. *
  242. * @memberOf ImportService
  243. * @param {object} meta meta data from meta.json
  244. */
  245. validate(meta) {
  246. if (meta.version !== this.crowi.version) {
  247. throw new Error('the version of this growi and the growi that exported the data are not met');
  248. }
  249. // TODO: check if all migrations are completed
  250. // - export: throw err if there are pending migrations
  251. // - import: throw err if there are pending migrations
  252. }
  253. }
  254. module.exports = ImportService;