Futa Arai 1 год назад
Родитель
Сommit
e1f8e1fe82

+ 1 - 0
apps/pdf-converter/.env

@@ -0,0 +1 @@
+PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium

+ 2 - 0
apps/pdf-converter/.eslintignore

@@ -0,0 +1,2 @@
+/dist/**
+/src/client-library/**

+ 7 - 0
apps/pdf-converter/.eslintrc.js

@@ -0,0 +1,7 @@
+module.exports = {
+  extends: '../../.eslintrc.js',
+  rules: {
+    'no-useless-constructor': 'off',
+    '@typescript-eslint/consistent-type-imports': 'off',
+  },
+};

+ 3 - 0
apps/pdf-converter/.gitignore

@@ -0,0 +1,3 @@
+/dist
+/specs
+/src/client-library

+ 83 - 0
apps/pdf-converter/docker/Dockerfile

@@ -0,0 +1,83 @@
+########################################################################
+# deps-resolver
+########################################################################
+FROM node:20-slim AS deps-resolver
+
+WORKDIR /app
+
+COPY package.json yarn.lock ./
+
+RUN yarn --frozen-lockfile
+
+RUN tar -cf node_modules.tar \
+  node_modules \
+  && rm -rf node_modules
+
+
+########################################################################
+# deps-resolver-prod
+########################################################################
+FROM deps-resolver AS deps-resolver-prod
+
+WORKDIR /app
+
+RUN yarn --production
+
+RUN tar -cf node_modules.tar \
+  node_modules \
+  && rm -rf node_modules
+
+
+########################################################################
+# builder
+########################################################################
+FROM node:20-slim AS builder
+
+WORKDIR /app
+
+COPY package.json yarn.lock tsconfig.json tsconfig.build.json .eslintrc.js ./
+COPY src ./src
+COPY --from=deps-resolver /app/node_modules.tar ./
+
+RUN tar -xf node_modules.tar \
+  && rm node_modules.tar \
+  && yarn build \
+  && tar -cf packages.tar \
+    package.json \
+    yarn.lock \
+    dist \
+  && rm -rf node_modules
+
+
+########################################################################
+# production
+########################################################################
+FROM node:20-slim
+
+ENV NODE_ENV=production
+ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
+ENV LANG=ja_JP.UTF-8
+
+RUN apt-get update && apt-get install -y tini chromium locales fonts-ipafont fonts-ipaexfont fonts-ipafont-gothic fonts-ipafont-mincho && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* \
+    && echo "ja_JP UTF-8" > /etc/locale.gen && locale-gen
+
+USER node
+WORKDIR /app
+
+COPY --from=deps-resolver-prod --chown=node:node \
+  /app/node_modules.tar ./
+COPY --from=builder --chown=node:node \
+  /app/packages.tar ./
+
+RUN tar -xf node_modules.tar \
+  && tar -xf packages.tar \
+  && rm node_modules.tar packages.tar
+
+# change permission for shared volume
+RUN mkdir -p /tmp/page-bulk-export && chmod -R 777 /tmp/page-bulk-export
+
+EXPOSE 3004
+
+ENTRYPOINT ["/usr/bin/tini", "-e", "143", "--"]
+CMD ["node", "dist/index.js"]

+ 6 - 0
apps/pdf-converter/orval.config.js

@@ -0,0 +1,6 @@
+module.exports = {
+  'client-library': {
+    input: './specs/v3/docs/swagger.yaml',
+    output: './src/client-library/index.ts',
+  },
+};

+ 38 - 0
apps/pdf-converter/package.json

@@ -0,0 +1,38 @@
+{
+  "name": "@growi/pdf-converter",
+  "version": "1.0.0",
+  "main": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "license": "MIT",
+  "private": true,
+  "scripts": {
+    "dev:pdf-converter": "nodemon --watch \"src/**/*.ts\" --ignore \"node_modules/**/*\" --exec ts-node -r \"dotenv-flow/config\" src/index.ts",
+    "lint": "yarn eslint **/*.{js,ts}",
+    "gen:client-code": "tsed run generate-swagger --output ./specs && orval",
+    "build": "yarn gen:client-code && tsc -p tsconfig.build.json"
+  },
+  "dependencies": {
+    "@tsed/cli": "^5.4.3",
+    "@tsed/cli-core": "^5.4.3",
+    "@tsed/cli-generate-swagger": "^5.4.3",
+    "@tsed/common": "7.83.4",
+    "@tsed/components-scan": "7.83.4",
+    "@tsed/core": "7.83.4",
+    "@tsed/di": "7.83.4",
+    "@tsed/exceptions": "7.83.4",
+    "@tsed/json-mapper": "7.83.4",
+    "@tsed/platform-express": "7.83.4",
+    "@tsed/schema": "7.83.4",
+    "@tsed/swagger": "7.83.4",
+    "express": "^4.19.2",
+    "puppeteer": "^23.1.1",
+    "puppeteer-cluster": "^0.24.0"
+  },
+  "devDependencies": {
+    "@types/connect": "^3.4.38",
+    "@types/express": "^4.17.21",
+    "@types/multer": "^1.4.12",
+    "@types/node": "^22.5.4",
+    "orval": "^7.1.1"
+  }
+}

+ 10 - 0
apps/pdf-converter/src/bin/index.ts

@@ -0,0 +1,10 @@
+#!/usr/bin/env node
+import { CliCore } from '@tsed/cli-core';
+import { GenerateSwaggerCmd } from '@tsed/cli-generate-swagger';
+
+import Server from '../server';
+
+CliCore.bootstrap({
+  server: Server,
+  commands: [GenerateSwaggerCmd],
+});

+ 1 - 0
apps/pdf-converter/src/controllers/index.ts

@@ -0,0 +1 @@
+export { default } from './pdf';

+ 51 - 0
apps/pdf-converter/src/controllers/pdf.ts

@@ -0,0 +1,51 @@
+import { BodyParams, Logger } from '@tsed/common';
+import { Controller, Inject } from '@tsed/di';
+import { InternalServerError } from '@tsed/exceptions';
+import {
+  Post, Returns, Enum, Description,
+} from '@tsed/schema';
+
+import PdfConvertService, { JobStatusSharedWithGrowi, JobStatus } from '../service/pdf-convert';
+
+@Controller('/pdf')
+class PdfCtrl {
+
+  @Inject()
+    logger: Logger;
+
+  constructor(private readonly pdfConvertService: PdfConvertService) {}
+
+  @Post('/sync-job')
+  @(Returns(202).ContentType('application/json').Schema({
+    type: 'object',
+    properties: {
+      status: { type: 'string', enum: Object.values(JobStatus) },
+    },
+    required: ['status'],
+  }))
+  @Returns(500)
+  @Description(`
+    Sync job pdf convert status with GROWI.
+    Register or update job inside pdf-converter with given jobId, expirationDate, and status.
+    Return resulting status of job to GROWI.
+  `)
+  async syncJobStatus(
+    @BodyParams('jobId') jobId: string,
+    @BodyParams('expirationDate') expirationDateStr: string,
+    @BodyParams('status') @Enum(Object.values(JobStatusSharedWithGrowi)) growiJobStatus: JobStatusSharedWithGrowi,
+  ): Promise<{ status: JobStatus }> {
+    const expirationDate = new Date(expirationDateStr);
+    try {
+      await this.pdfConvertService.registerOrUpdateJob(jobId, expirationDate, growiJobStatus);
+      this.pdfConvertService.cleanUpJobList();
+      return { status: this.pdfConvertService.getJobStatus(jobId) };
+    }
+    catch (err) {
+      this.logger.error('Failed to register or update job', err);
+      throw new InternalServerError(err);
+    }
+  }
+
+}
+
+export default PdfCtrl;

+ 19 - 0
apps/pdf-converter/src/index.ts

@@ -0,0 +1,19 @@
+import { $log } from '@tsed/common';
+import { PlatformExpress } from '@tsed/platform-express';
+
+import Server from './server';
+
+async function bootstrap() {
+  try {
+    $log.debug('Start server...');
+    const platform = await PlatformExpress.bootstrap(Server);
+
+    await platform.listen();
+    $log.debug('Server initialized');
+  }
+  catch (error) {
+    $log.error(error);
+  }
+}
+
+bootstrap();

+ 37 - 0
apps/pdf-converter/src/server.ts

@@ -0,0 +1,37 @@
+import { PlatformApplication } from '@tsed/common';
+import { Configuration, Inject } from '@tsed/di';
+import express from 'express';
+import '@tsed/swagger';
+
+import * as Controllers from './controllers';
+
+import '@tsed/platform-express';
+
+const PORT = Number(process.env.PORT || 3004);
+
+@Configuration({
+  port: PORT,
+  acceptMimes: ['application/json'],
+  mount: {
+    '/': [...Object.values(Controllers)],
+  },
+  middlewares: [
+    'json-parser',
+    express.json({ limit: '50mb' }),
+    express.urlencoded({ extended: true, limit: '50mb' }),
+  ],
+  swagger: [
+    {
+      path: '/v3/docs',
+      specVersion: '3.0.1',
+    },
+  ],
+})
+class Server {
+
+  @Inject()
+    app: PlatformApplication;
+
+}
+
+export default Server;

+ 282 - 0
apps/pdf-converter/src/service/pdf-convert.ts

@@ -0,0 +1,282 @@
+import fs from 'fs';
+import path from 'path';
+import { Readable, Writable } from 'stream';
+import { pipeline as pipelinePromise } from 'stream/promises';
+
+import type { Logger } from '@tsed/common';
+import { Inject, Service } from '@tsed/di';
+import { Cluster } from 'puppeteer-cluster';
+
+interface PageInfo {
+  htmlString: string;
+  htmlFilePath: string;
+}
+
+export const JobStatusSharedWithGrowi = {
+  HTML_EXPORT_IN_PROGRESS: 'HTML_EXPORT_IN_PROGRESS',
+  HTML_EXPORT_DONE: 'HTML_EXPORT_DONE',
+  FAILED: 'FAILED',
+} as const;
+
+export const JobStatus = {
+  ...JobStatusSharedWithGrowi,
+  PDF_EXPORT_DONE: 'PDF_EXPORT_DONE',
+} as const;
+
+export type JobStatusSharedWithGrowi = typeof JobStatusSharedWithGrowi[keyof typeof JobStatusSharedWithGrowi]
+export type JobStatus = typeof JobStatus[keyof typeof JobStatus]
+
+interface JobInfo {
+  expirationDate: Date;
+  status: JobStatus;
+  currentStream?: Readable;
+}
+
+@Service()
+class PdfConvertService {
+
+  private puppeteerCluster: Cluster | undefined;
+
+  private maxConcurrency = 1;
+
+  private convertRetryLimit = 5;
+
+  private tmpOutputRootDir = '/tmp/page-bulk-export';
+
+  private tmpHtmlDir = `${this.tmpOutputRootDir}/html`;
+
+  private jobList: {
+    [key: string]: JobInfo;
+  } = {};
+
+  @Inject()
+    logger: Logger;
+
+  /**
+   * Register or update job inside jobList with given jobId, expirationDate, and status.
+   * If job is new, start reading html files and convert them to pdf.
+   * @param jobId id of PageBulkExportJob
+   * @param expirationDate expiration date of job
+   * @param status status of job
+   */
+  async registerOrUpdateJob(jobId: string, expirationDate: Date, status: JobStatusSharedWithGrowi): Promise<void> {
+    if (this.puppeteerCluster == null) await this.initPuppeteerCluster();
+
+    const isJobNew = !(jobId in this.jobList);
+
+    if (isJobNew) {
+      this.jobList[jobId] = { expirationDate, status };
+    }
+    else {
+      const jobInfo = this.jobList[jobId];
+      jobInfo.expirationDate = expirationDate;
+
+      if (!this.isJobCompleted(jobId)) {
+        jobInfo.status = status;
+      }
+    }
+
+    if (status === JobStatus.FAILED) {
+      this.jobList[jobId].currentStream?.destroy(new Error('job failed'));
+    }
+
+    if (isJobNew && status !== JobStatus.FAILED) {
+      this.readHtmlAndConvertToPdfUntilFinish(jobId);
+    }
+  }
+
+  /**
+   * Get job status
+   * @param jobId id of PageBulkExportJob
+   * @returns job status
+   */
+  getJobStatus(jobId: string): JobStatus {
+    if (!(jobId in this.jobList)) return JobStatus.FAILED;
+    return this.jobList[jobId].status;
+  }
+
+  /**
+   * Clean up job list by removing expired jobs, finished jobs, and failed jobs
+   */
+  cleanUpJobList(): void {
+    const now = new Date();
+    for (const jobId of Object.keys(this.jobList)) {
+      const job = this.jobList[jobId];
+      if (now > job.expirationDate || this.isJobCompleted(jobId)) {
+        job.currentStream?.destroy(new Error('job expired'));
+        delete this.jobList[jobId];
+      }
+    }
+  }
+
+  private isJobCompleted(jobId: string): boolean {
+    return this.jobList[jobId].status === JobStatus.PDF_EXPORT_DONE || this.jobList[jobId].status === JobStatus.FAILED;
+  }
+
+
+  /**
+   * Read html files from shared fs path, convert them to pdf, and save them to shared fs path.
+   * Repeat this until all html files are converted to pdf or job fails.
+   * @param jobId id of PageBulkExportJob
+   */
+  private async readHtmlAndConvertToPdfUntilFinish(jobId: string): Promise<void> {
+    while (!this.isJobCompleted(jobId)) {
+      // eslint-disable-next-line no-await-in-loop
+      await new Promise(resolve => setTimeout(resolve, 60 * 1000));
+
+      try {
+        if (new Date() > this.jobList[jobId].expirationDate) {
+          throw new Error('Job expired');
+        }
+
+        const htmlReadable = this.getHtmlReadable(jobId);
+        const pdfWritable = this.getPdfWritable();
+        this.jobList[jobId].currentStream = htmlReadable;
+
+        // eslint-disable-next-line no-await-in-loop
+        await pipelinePromise(htmlReadable, pdfWritable);
+        this.jobList[jobId].currentStream = undefined;
+      }
+      catch (err) {
+        this.logger.error('Failed to convert html to pdf', err);
+        this.jobList[jobId].status = JobStatus.FAILED;
+        this.jobList[jobId].currentStream?.destroy(new Error('Failed to convert html to pdf'));
+        break;
+      }
+    }
+  }
+
+  /**
+   * Get readable stream that reads html files from shared fs path
+   * @param jobId id of PageBulkExportJob
+   * @returns readable stream
+   */
+  private getHtmlReadable(jobId: string): Readable {
+    const htmlFileEntries = fs.readdirSync(path.join(this.tmpHtmlDir, jobId), { recursive: true, withFileTypes: true }).filter(entry => entry.isFile());
+    let index = 0;
+
+    const jobList = this.jobList;
+
+    return new Readable({
+      objectMode: true,
+      async read() {
+        if (index >= htmlFileEntries.length) {
+          if (jobList[jobId].status === JobStatus.HTML_EXPORT_DONE && htmlFileEntries.length === 0) {
+            jobList[jobId].status = JobStatus.PDF_EXPORT_DONE;
+          }
+          this.push(null);
+          return;
+        }
+
+        const entry = htmlFileEntries[index];
+        const htmlFilePath = path.join(entry.parentPath, entry.name);
+        const htmlString = await fs.promises.readFile(htmlFilePath, 'utf-8');
+
+        this.push({ htmlString, htmlFilePath });
+
+        index += 1;
+      },
+    });
+  }
+
+  /**
+   * Get writable stream that converts html to pdf, and save it to shared fs path
+   * @returns writable stream
+   */
+  private getPdfWritable(): Writable {
+    return new Writable({
+      objectMode: true,
+      write: async(pageInfo: PageInfo, encoding, callback) => {
+        const fileOutputPath = pageInfo.htmlFilePath.replace(new RegExp(`^${this.tmpHtmlDir}`), this.tmpOutputRootDir).replace(/\.html$/, '.pdf');
+        const fileOutputParentPath = this.getParentPath(fileOutputPath);
+
+        try {
+          const pdfBody = await this.convertHtmlToPdf(pageInfo.htmlString);
+          await fs.promises.mkdir(fileOutputParentPath, { recursive: true });
+          await fs.promises.writeFile(fileOutputPath, pdfBody);
+
+          await fs.promises.rm(pageInfo.htmlFilePath, { force: true });
+        }
+        catch (err) {
+          callback(err);
+          return;
+        }
+        callback();
+      },
+    });
+  }
+
+  /**
+   * Convert html to pdf. Retry up to convertRetryLimit if failed.
+   * @param htmlString html to convert to pdf
+   * @returns converted pdf
+   */
+  private async convertHtmlToPdf(htmlString: string): Promise<Buffer> {
+    const executeConvert = async(retries: number) => {
+      try {
+        return this.puppeteerCluster.execute(htmlString);
+      }
+      catch (err) {
+        if (retries > 0) {
+          this.logger.error('Failed to convert markdown to pdf. Retrying...', err);
+          return executeConvert(retries - 1);
+        }
+        throw err;
+      }
+    };
+
+    const result = await executeConvert(this.convertRetryLimit);
+
+    return result;
+  }
+
+  /**
+   * Initialize puppeteer cluster
+   */
+  private async initPuppeteerCluster(): Promise<void> {
+    this.puppeteerCluster = await Cluster.launch({
+      concurrency: Cluster.CONCURRENCY_PAGE,
+      maxConcurrency: this.maxConcurrency,
+      workerCreationDelay: 10000,
+    });
+
+    await this.puppeteerCluster.task(async({ page, data: htmlString }) => {
+      await page.setContent(htmlString, { waitUntil: 'domcontentloaded' });
+      await page.emulateMediaType('screen');
+      const pdfResult = await page.pdf({
+        margin: {
+          top: '100px', right: '50px', bottom: '100px', left: '50px',
+        },
+        printBackground: true,
+        format: 'A4',
+      });
+      return pdfResult;
+    });
+
+    // close cluster on app termination
+    const handleClose = async() => {
+      this.logger.info('Closing puppeteer cluster...');
+      await this.puppeteerCluster?.idle();
+      await this.puppeteerCluster?.close();
+      process.exit();
+    };
+    process.on('SIGINT', handleClose);
+    process.on('SIGTERM', handleClose);
+  }
+
+  /**
+   * Get parent path from given path
+   * @param path target path
+   * @returns parent path
+   */
+  private getParentPath(path: string): string {
+    const parentPath = path.split('/').slice(0, -1).join('/');
+    if (parentPath === '' || parentPath === '/') {
+      return '/';
+    }
+    return parentPath;
+  }
+
+}
+
+export default PdfConvertService;

+ 4 - 0
apps/pdf-converter/tsconfig.build.json

@@ -0,0 +1,4 @@
+{
+  "extends": "./tsconfig.json",
+  "exclude": ["node_modules", "dist", "test"]
+}

+ 19 - 0
apps/pdf-converter/tsconfig.json

@@ -0,0 +1,19 @@
+{
+  "extends": "../../tsconfig.base.json",
+  "compilerOptions": {
+    "baseUrl": ".",
+    "outDir": "dist",
+    "declaration": true,
+    "target": "ES2020",
+    "lib": ["ES2020", "dom"],
+    "module": "commonjs",
+    "moduleResolution": "node",
+    "rootDir": "src",
+    "esModuleInterop": true,
+    "experimentalDecorators": true,
+    "emitDecoratorMetadata": true,
+    "strict": false
+  },
+  "include": ["./src/**/*", "./test/**/*"],
+  "exclude": ["node_modules", "dist"]
+}

Разница между файлами не показана из-за своего большого размера
+ 770 - 1
yarn.lock


Некоторые файлы не были показаны из-за большого количества измененных файлов