Browse Source

Merge branch 'feat/opentelemetry' into support/new-config-manager

Yuki Takei 1 year ago
parent
commit
eac938c65e

+ 10 - 0
.devcontainer/compose.yml

@@ -8,6 +8,10 @@ services:
       - buildcache_app:/workspace/growi/apps/app/.next
       - ../../growi-docker-compose:/workspace/growi-docker-compose:delegated
     tty: true
+    networks:
+    - default
+    # https://redmine.weseek.co.jp/issues/144306 で整備予定
+    # - opentelemetry-collector-dev-setup_default
 
   mongo:
     image: mongo:6.0
@@ -44,3 +48,9 @@ volumes:
   pnpm-store:
   node_modules:
   buildcache_app:
+
+networks:
+  default:
+  # https://redmine.weseek.co.jp/issues/144306 で整備予定
+  # opentelemetry-collector-dev-setup_default:
+  #   external: true

+ 2 - 1
.vscode/settings.json

@@ -25,6 +25,7 @@
   "typescript.enablePromptUseWorkspaceTsdk": true,
   "typescript.preferences.autoImportFileExcludePatterns": ["node_modules/*"],
   "typescript.validate.enable": true,
-  "typescript.surveys.enabled": false
+  "typescript.surveys.enabled": false,
 
+  "vitest.filesWatcherInclude": "**/*"
 }

+ 2 - 0
apps/app/.env.development

@@ -4,6 +4,8 @@
 ##
 MIGRATIONS_DIR=src/migrations/
 
+OPENTELEMETRY_ENABLED=false
+
 APP_SITE_URL=http://localhost:3000
 FILE_UPLOAD=mongodb
 # MONGO_GRIDFS_TOTAL_LIMIT=10485760

+ 9 - 0
apps/app/package.json

@@ -84,6 +84,15 @@
     "@growi/remark-lsx": "workspace:^",
     "@growi/slack": "workspace:^",
     "@keycloak/keycloak-admin-client": "^18.0.0",
+    "@opentelemetry/api": "^1.8.0",
+    "@opentelemetry/auto-instrumentations-node": "^0.52.1",
+    "@opentelemetry/exporter-metrics-otlp-grpc": "^0.54.2",
+    "@opentelemetry/exporter-trace-otlp-grpc": "^0.54.2",
+    "@opentelemetry/resources": "^1.27.0",
+    "@opentelemetry/semantic-conventions": "^1.27.0",
+    "@opentelemetry/sdk-metrics": "^1.27.0",
+    "@opentelemetry/sdk-node": "^0.54.2",
+    "@opentelemetry/sdk-trace-node": "^1.27.0",
     "@slack/web-api": "^6.2.4",
     "@slack/webhook": "^6.0.0",
     "JSONStream": "^1.3.5",

+ 1 - 0
apps/app/src/features/opentelemetry/server/index.ts

@@ -0,0 +1 @@
+export * from './start';

+ 76 - 0
apps/app/src/features/opentelemetry/server/logger.ts

@@ -0,0 +1,76 @@
+import { diag, type DiagLogger } from '@opentelemetry/api';
+
+import loggerFactory from '~/utils/logger';
+
+const logger = loggerFactory('growi:opentelemetry:diag');
+
+
+class DiagLoggerBunyanAdapter implements DiagLogger {
+
+  private parseMessage(message: string, args: unknown[]): [logMessage: string, data: object] {
+    let logMessage = message;
+    let data = {};
+
+    // check whether the message is a JSON string
+    try {
+      const parsedMessage = JSON.parse(message);
+      if (typeof parsedMessage === 'object' && parsedMessage !== null) {
+        data = parsedMessage;
+        // if parsed successfully, use 'message' property as log message
+        logMessage = 'message' in data && typeof data.message === 'string'
+          ? data.message
+          : message;
+      }
+    }
+    catch (e) {
+      // do nothing if the message is not a JSON string
+    }
+
+    // merge additional data
+    if (args.length > 0) {
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const argsData = (args as any).reduce((acc, arg) => {
+        if (typeof arg === 'string') {
+          try {
+            const parsed = JSON.parse(arg);
+            return { ...acc, ...parsed };
+          }
+          catch (e) {
+            return { ...acc, additionalInfo: arg };
+          }
+        }
+        return { ...acc, ...arg };
+      }, {});
+      data = { ...data, ...argsData };
+    }
+
+    return [logMessage, data];
+  }
+
+  error(message: string, ...args): void {
+    logger.error(...this.parseMessage(message, args));
+  }
+
+  warn(message: string, ...args): void {
+    logger.warn(...this.parseMessage(message, args));
+  }
+
+  info(message: string, ...args): void {
+    logger.info(...this.parseMessage(message, args));
+  }
+
+  debug(message: string, ...args): void {
+    logger.debug(...this.parseMessage(message, args));
+  }
+
+  verbose(message: string, ...args): void {
+    logger.trace(...this.parseMessage(message, args));
+  }
+
+}
+
+
+export const initLogger = (): void => {
+  // Enable global logger for OpenTelemetry
+  diag.setLogger(new DiagLoggerBunyanAdapter());
+};

+ 47 - 0
apps/app/src/features/opentelemetry/server/node-sdk-configuration.ts

@@ -0,0 +1,47 @@
+import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node';
+import { OTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-grpc';
+import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-grpc';
+import { Resource } from '@opentelemetry/resources';
+import { PeriodicExportingMetricReader } from '@opentelemetry/sdk-metrics';
+import type { NodeSDKConfiguration } from '@opentelemetry/sdk-node';
+import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION, SEMRESATTRS_SERVICE_INSTANCE_ID } from '@opentelemetry/semantic-conventions';
+
+
+export const generateNodeSDKConfiguration = (instanceId: string, version: string): Partial<NodeSDKConfiguration> => {
+  return {
+    resource: new Resource({
+      [ATTR_SERVICE_NAME]: 'growi',
+      [ATTR_SERVICE_VERSION]: version,
+      [SEMRESATTRS_SERVICE_INSTANCE_ID]: instanceId,
+    }),
+    traceExporter: new OTLPTraceExporter(),
+    metricReader: new PeriodicExportingMetricReader({
+      exporter: new OTLPMetricExporter(),
+      exportIntervalMillis: 10000,
+    }),
+    instrumentations: [getNodeAutoInstrumentations({
+      '@opentelemetry/instrumentation-bunyan': {
+        enabled: false,
+      },
+      // disable fs instrumentation since this generates very large amount of traces
+      // see: https://opentelemetry.io/docs/languages/js/libraries/#registration
+      '@opentelemetry/instrumentation-fs': {
+        enabled: false,
+      },
+    })],
+  };
+};
+
+// public async shutdownInstrumentation(): Promise<void> {
+//   await this.sdkInstance.shutdown();
+
+//   // メモ: 以下の restart コードは動かない
+//   // span/metrics ともに何も出なくなる
+//   // そもそも、restart するような使い方が出来なさそう?
+//   // see: https://github.com/open-telemetry/opentelemetry-specification/issues/27/
+//   // const sdk = new NodeSDK({...});
+//   // sdk.start();
+//   // await sdk.shutdown().catch(console.error);
+//   // const newSdk = new NodeSDK({...});
+//   // newSdk.start();
+// }

+ 87 - 0
apps/app/src/features/opentelemetry/server/start.ts

@@ -0,0 +1,87 @@
+import type { NodeSDK } from '@opentelemetry/sdk-node';
+
+import { configManager } from '~/server/service/config-manager';
+import loggerFactory from '~/utils/logger';
+
+
+const logger = loggerFactory('growi:opentelemetry:server');
+
+
+let sdkInstance: NodeSDK;
+
+/**
+ * Overwrite "OTEL_SDK_DISABLED" env var before sdk.start() is invoked if needed.
+ * Since otel library sees it.
+ */
+function overwriteSdkDisabled(): void {
+  const instrumentationEnabled = configManager.getConfig('crowi', 'otel:enabled');
+
+  if (instrumentationEnabled && (
+    process.env.OTEL_SDK_DISABLED === 'true'
+    || process.env.OTEL_SDK_DISABLED === '1'
+  )) {
+    logger.warn("OTEL_SDK_DISABLED overwritten with 'false' since GROWI's 'otel:enabled' config is true.");
+    process.env.OTEL_SDK_DISABLED = 'false';
+    return;
+  }
+
+  if (!instrumentationEnabled && (
+    process.env.OTEL_SDK_DISABLED === 'false'
+    || process.env.OTEL_SDK_DISABLED === '0'
+  )) {
+    logger.warn("OTEL_SDK_DISABLED is overwritten with 'true' since GROWI's 'otel:enabled' config is false.");
+    process.env.OTEL_SDK_DISABLED = 'true';
+    return;
+  }
+
+}
+
+export const startInstrumentation = async(version: string): Promise<void> => {
+  if (sdkInstance != null) {
+    logger.warn('OpenTelemetry instrumentation already started');
+    return;
+  }
+
+  overwriteSdkDisabled();
+
+  const instrumentationEnabled = configManager.getConfig('crowi', 'otel:enabled');
+  if (instrumentationEnabled) {
+
+    logger.info(`GROWI now collects anonymous telemetry.
+
+This data is used to help improve GROWI, but you can opt-out at any time.
+
+For more information, see https://docs.growi.org/en/admin-guide/telemetry.html.
+`);
+
+    // initialize global logger for development
+    const isDev = process.env.NODE_ENV === 'development';
+    if (isDev) {
+      const { initLogger } = await import('./logger');
+      initLogger();
+    }
+
+    // instanciate NodeSDK
+    const { NodeSDK } = await import('@opentelemetry/sdk-node');
+    const { generateNodeSDKConfiguration } = await import('./node-sdk-configuration');
+
+    const serviceInstanceId = configManager.getConfig('crowi', 'otel:serviceInstanceId');
+
+    sdkInstance = new NodeSDK(generateNodeSDKConfiguration(serviceInstanceId, version));
+    sdkInstance.start();
+  }
+};
+
+// public async shutdownInstrumentation(): Promise<void> {
+//   await this.sdkInstance.shutdown();
+
+//   // メモ: 以下の restart コードは動かない
+//   // span/metrics ともに何も出なくなる
+//   // そもそも、restart するような使い方が出来なさそう?
+//   // see: https://github.com/open-telemetry/opentelemetry-specification/issues/27/
+//   // const sdk = new NodeSDK({...});
+//   // sdk.start();
+//   // await sdk.shutdown().catch(console.error);
+//   // const newSdk = new NodeSDK({...});
+//   // newSdk.start();
+// }

+ 4 - 0
apps/app/src/server/crowi/index.js

@@ -13,6 +13,7 @@ import pkg from '^/package.json';
 import { KeycloakUserGroupSyncService } from '~/features/external-user-group/server/service/keycloak-user-group-sync';
 import { LdapUserGroupSyncService } from '~/features/external-user-group/server/service/ldap-user-group-sync';
 import { startCronIfEnabled as startOpenaiCronIfEnabled } from '~/features/openai/server/services/cron';
+import { startInstrumentation } from '~/features/opentelemetry/server';
 import QuestionnaireService from '~/features/questionnaire/server/service/questionnaire';
 import QuestionnaireCronService from '~/features/questionnaire/server/service/questionnaire-cron';
 import loggerFactory from '~/utils/logger';
@@ -144,6 +145,9 @@ Crowi.prototype.init = async function() {
   await this.setupSessionConfig();
   this.setupCron();
 
+  // start OpenTelemetry
+  startInstrumentation(this.version);
+
   // setup messaging services
   await this.setupS2sMessagingService();
   await this.setupSocketIoService();

+ 20 - 0
apps/app/src/server/service/config-loader.ts

@@ -751,6 +751,26 @@ const ENV_VAR_NAME_TO_CONFIG_INFO: Record<string, EnvConfig> = {
     type: ValueType.NUMBER,
     default: 172800, // 2 days
   },
+  OPENTELEMETRY_ENABLED: {
+    ns: 'crowi',
+    key: 'otel:enabled',
+    type: ValueType.BOOLEAN,
+    default: true,
+  },
+  OPENTELEMETRY_IS_APP_SITE_URL_HASHED: {
+    ns: 'crowi',
+    key: 'otel:isAppSiteUrlHashed',
+    type: ValueType.BOOLEAN,
+    default: false,
+  },
+  // TODO: fix after the decision of the instrumentation data specification
+  // https://redmine.weseek.co.jp/issues/144351
+  OPENTELEMETRY_SERVICE_INSTANCE_ID: {
+    ns: 'crowi',
+    key: 'otel:serviceInstanceId',
+    type: ValueType.STRING,
+    default: null,
+  },
   AI_ENABLED: {
     ns: 'crowi',
     key: 'app:aiEnabled',

File diff suppressed because it is too large
+ 1264 - 10
pnpm-lock.yaml


Some files were not shown because too many files changed in this diff