import.js 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. const logger = require('@alias/logger')('growi:services:ImportService'); // eslint-disable-line no-unused-vars
  2. const fs = require('fs');
  3. const path = require('path');
  4. const JSONStream = require('JSONStream');
  5. const streamToPromise = require('stream-to-promise');
  6. const unzip = require('unzipper');
  7. const { ObjectId } = require('mongoose').Types;
  8. class ImportService {
  9. constructor(crowi) {
  10. this.baseDir = path.join(crowi.tmpDir, 'imports');
  11. this.encoding = 'utf-8';
  12. this.per = 100;
  13. this.keepOriginal = this.keepOriginal.bind(this);
  14. // { pages: Page, users: User, ... }
  15. this.collectionMap = {};
  16. this.initCollectionMap(crowi.models);
  17. // { pages: { _id: ..., path: ..., ...}, users: { _id: ..., username: ..., }, ... }
  18. this.convertMap = {
  19. pages: {
  20. status: 'published', // FIXME when importing users and user groups
  21. grant: 1, // FIXME when importing users and user groups
  22. grantedUsers: [], // FIXME when importing users and user groups
  23. grantedGroup: null, // FIXME when importing users and user groups
  24. liker: [], // FIXME when importing users
  25. seenUsers: [], // FIXME when importing users
  26. commentCount: 0, // FIXME when importing comments
  27. extended: {}, // FIXME when ?
  28. pageIdOnHackmd: undefined, // FIXME when importing hackmd?
  29. revisionHackmdSynced: undefined, // FIXME when importing hackmd?
  30. hasDraftOnHackmd: undefined, // FIXME when importing hackmd?
  31. },
  32. };
  33. this.initConvertMap(crowi.models);
  34. }
  35. /**
  36. * initialize collection map
  37. *
  38. * @memberOf ImportService
  39. * @param {object} models from models/index.js
  40. */
  41. initCollectionMap(models) {
  42. for (const model of Object.values(models)) {
  43. this.collectionMap[model.collection.collectionName] = model;
  44. }
  45. }
  46. /**
  47. * initialize convert map. set keepOriginal as default
  48. *
  49. * @memberOf ImportService
  50. * @param {object} models from models/index.js
  51. */
  52. initConvertMap(models) {
  53. // by default, original value is used for imported documents
  54. for (const model of Object.values(models)) {
  55. const { collectionName } = model.collection;
  56. // temporary store for convert map
  57. const _convertMap = this.convertMap[collectionName];
  58. this.convertMap[collectionName] = {};
  59. for (const key of Object.keys(model.schema.paths)) {
  60. this.convertMap[collectionName][key] = this.keepOriginal;
  61. }
  62. // assign back the original convert map
  63. Object.assign(this.convertMap[collectionName], _convertMap);
  64. }
  65. }
  66. /**
  67. * keep original value
  68. * automatically convert ObjectId
  69. *
  70. * @memberOf ImportService
  71. * @param {array<object>} _value value from imported document
  72. * @param {{ _document: object, schema: object, key: string }}
  73. * @return {any} new value for the document
  74. */
  75. keepOriginal(_value, { _document, schema, key }) {
  76. let value;
  77. if (schema[key].instance === 'ObjectID' && ObjectId.isValid(_value)) {
  78. value = ObjectId(_value);
  79. }
  80. else {
  81. value = _value;
  82. }
  83. return value;
  84. }
  85. getConvertMap(collectionName) {
  86. const convertMap = {};
  87. // each key accepts either function or hardcoded value
  88. // 1. to keep the same value => unlist the key
  89. // 2. to filter out an attribute, explicitly set it to undefined. e.g. "[key]: undefined"
  90. if (collectionName === 'pages') {
  91. Object.assign(convertMap, {
  92. status: 'published', // FIXME when importing users and user groups
  93. grant: 1, // FIXME when importing users and user groups
  94. grantedUsers: [], // FIXME when importing users and user groups
  95. grantedGroup: null, // FIXME when importing users and user groups
  96. liker: [], // FIXME when importing users
  97. seenUsers: [], // FIXME when importing users
  98. commentCount: 0, // FIXME when importing comments
  99. extended: {}, // FIXME when ?
  100. pageIdOnHackmd: undefined, // FIXME when importing hackmd?
  101. revisionHackmdSynced: undefined, // FIXME when importing hackmd?
  102. hasDraftOnHackmd: undefined, // FIXME when importing hackmd?
  103. });
  104. }
  105. return convertMap;
  106. }
  107. /**
  108. * import a collection from json
  109. *
  110. * @memberOf ImportService
  111. * @param {object} Model instance of mongoose model
  112. * @param {string} filePath path to zipped json
  113. * @param {object} overwriteParams overwrite each document with unrelated value. e.g. { creator: req.user }
  114. */
  115. async importFromZip(Model, filePath, overwriteParams = {}) {
  116. const { collectionName } = Model.collection;
  117. // extract zip file
  118. await this.unzip(filePath);
  119. let counter = 0;
  120. let nInsertedTotal = 0;
  121. let failedIds = [];
  122. let unorderedBulkOp = Model.collection.initializeUnorderedBulkOp();
  123. const tmpJson = path.join(this.baseDir, `${collectionName}.json`);
  124. const readStream = fs.createReadStream(tmpJson, { encoding: this.encoding });
  125. const jsonStream = readStream.pipe(JSONStream.parse('*'));
  126. jsonStream.on('data', async(document) => {
  127. // documents are not persisted until unorderedBulkOp.execute()
  128. unorderedBulkOp.insert(this.convertDocuments(Model, document, overwriteParams));
  129. counter++;
  130. if (counter % this.per === 0) {
  131. // puase jsonStream to prevent more items to be added to unorderedBulkOp
  132. jsonStream.pause();
  133. const { nInserted, failed } = await this.execUnorderedBulkOpSafely(unorderedBulkOp);
  134. nInsertedTotal += nInserted;
  135. failedIds = [...failedIds, ...failed];
  136. // reset initializeUnorderedBulkOp
  137. unorderedBulkOp = Model.collection.initializeUnorderedBulkOp();
  138. // resume jsonStream
  139. jsonStream.resume();
  140. }
  141. });
  142. jsonStream.on('end', async(data) => {
  143. // insert the rest. avoid errors when unorderedBulkOp has no items
  144. if (unorderedBulkOp.s.currentBatch !== null) {
  145. const { nInserted, failed } = await this.execUnorderedBulkOpSafely(unorderedBulkOp);
  146. nInsertedTotal += nInserted;
  147. failedIds = [...failedIds, ...failed];
  148. }
  149. logger.info(`Done. Inserted ${nInsertedTotal} ${collectionName}.`);
  150. if (failedIds.length > 0) {
  151. logger.error(`Failed to insert ${failedIds.length} ${collectionName}: ${failedIds.join(', ')}.`);
  152. }
  153. });
  154. // streamToPromise(jsonStream) throws error, so await readStream instead
  155. await streamToPromise(readStream);
  156. // clean up tmp directory
  157. fs.unlinkSync(tmpJson);
  158. }
  159. /**
  160. * extract a zip file
  161. *
  162. * @memberOf ImportService
  163. * @param {string} zipFilePath path to zip file
  164. */
  165. unzip(zipFilePath) {
  166. return new Promise((resolve, reject) => {
  167. const unzipStream = fs.createReadStream(zipFilePath).pipe(unzip.Extract({ path: this.baseDir }));
  168. unzipStream.on('error', (err) => {
  169. reject(err);
  170. });
  171. unzipStream.on('close', () => {
  172. resolve();
  173. });
  174. });
  175. }
  176. /**
  177. * execute unorderedBulkOp and ignore errors
  178. *
  179. * @memberOf ImportService
  180. * @param {object} unorderedBulkOp result of Model.collection.initializeUnorderedBulkOp()
  181. * @return {{nInserted: number, failed: string[]}} number of docuemnts inserted and failed
  182. */
  183. async execUnorderedBulkOpSafely(unorderedBulkOp) {
  184. // keep the number of documents inserted and failed for logger
  185. let nInserted = 0;
  186. const failed = [];
  187. // try catch to skip errors
  188. try {
  189. const log = await unorderedBulkOp.execute();
  190. nInserted = log.result.nInserted;
  191. }
  192. catch (err) {
  193. for (const error of err.result.result.writeErrors) {
  194. logger.error(error.errmsg);
  195. failed.push(error.err.op._id);
  196. }
  197. nInserted = err.result.result.nInserted;
  198. }
  199. logger.debug(`Importing ${unorderedBulkOp.s.collection.s.name}. Inserted: ${nInserted}. Failed: ${failed.length}.`);
  200. return {
  201. nInserted,
  202. failed,
  203. };
  204. }
  205. /**
  206. * execute unorderedBulkOp and ignore errors
  207. *
  208. * @memberOf ImportService
  209. * @param {object} Model instance of mongoose model
  210. * @param {object} _document document being imported
  211. * @param {object} overwriteParams overwrite each document with unrelated value. e.g. { creator: req.user }
  212. * @return {object} document to be persisted
  213. */
  214. convertDocuments(Model, _document, overwriteParams) {
  215. const collectionName = Model.collection.collectionName;
  216. const schema = Model.schema.paths;
  217. const convertMap = this.convertMap[collectionName];
  218. if (convertMap == null) {
  219. throw new Error(`attribute map is not defined for ${collectionName}`);
  220. }
  221. const document = {};
  222. // assign value from documents being imported
  223. for (const entry of Object.entries(convertMap)) {
  224. const [key, value] = entry;
  225. // distinguish between null and undefined
  226. if (_document[key] === undefined) {
  227. continue; // next entry
  228. }
  229. document[key] = (typeof value === 'function') ? value(_document[key], { _document, key, schema }) : value;
  230. }
  231. // overwrite documents with custom values
  232. for (const entry of Object.entries(overwriteParams)) {
  233. const [key, value] = entry;
  234. // distinguish between null and undefined
  235. if (_document[key] !== undefined) {
  236. document[key] = (typeof value === 'function') ? value(_document[key], { _document, key, schema }) : value;
  237. }
  238. }
  239. return document;
  240. }
  241. /**
  242. * get a model from collection name
  243. *
  244. * @memberOf ImportService
  245. * @param {object} collectionName collection name
  246. * @return {object} instance of mongoose model
  247. */
  248. getModelFromCollectionName(collectionName) {
  249. const Model = this.collectionMap[collectionName];
  250. if (Model == null) {
  251. throw new Error(`cannot find a model for collection name "${collectionName}"`);
  252. }
  253. return Model;
  254. }
  255. }
  256. module.exports = ImportService;