Просмотр исходного кода

Merge pull request #8811 from weseek/imprv/switch-otel-enabled

imprv: OpenTelemetry settings
Yuki Takei 1 год назад
Родитель
Сommit
a2cbd63da2

+ 2 - 0
apps/app/.env.development

@@ -4,6 +4,8 @@
 ##
 ##
 MIGRATIONS_DIR=src/migrations/
 MIGRATIONS_DIR=src/migrations/
 
 
+OPENTELEMETRY_ENABLED=false
+
 APP_SITE_URL=http://localhost:3000
 APP_SITE_URL=http://localhost:3000
 FILE_UPLOAD=mongodb
 FILE_UPLOAD=mongodb
 # MONGO_GRIDFS_TOTAL_LIMIT=10485760
 # MONGO_GRIDFS_TOTAL_LIMIT=10485760

+ 1 - 0
apps/app/src/features/opentelemetry/server/index.ts

@@ -0,0 +1 @@
+export * from './start';

+ 76 - 0
apps/app/src/features/opentelemetry/server/logger.ts

@@ -0,0 +1,76 @@
+import { diag, type DiagLogger } from '@opentelemetry/api';
+
+import loggerFactory from '~/utils/logger';
+
+const logger = loggerFactory('growi:opentelemetry:diag');
+
+
+class DiagLoggerBunyanAdapter implements DiagLogger {
+
+  private parseMessage(message: string, args: unknown[]): [logMessage: string, data: object] {
+    let logMessage = message;
+    let data = {};
+
+    // check whether the message is a JSON string
+    try {
+      const parsedMessage = JSON.parse(message);
+      if (typeof parsedMessage === 'object' && parsedMessage !== null) {
+        data = parsedMessage;
+        // if parsed successfully, use 'message' property as log message
+        logMessage = 'message' in data && typeof data.message === 'string'
+          ? data.message
+          : message;
+      }
+    }
+    catch (e) {
+      // do nothing if the message is not a JSON string
+    }
+
+    // merge additional data
+    if (args.length > 0) {
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const argsData = (args as any).reduce((acc, arg) => {
+        if (typeof arg === 'string') {
+          try {
+            const parsed = JSON.parse(arg);
+            return { ...acc, ...parsed };
+          }
+          catch (e) {
+            return { ...acc, additionalInfo: arg };
+          }
+        }
+        return { ...acc, ...arg };
+      }, {});
+      data = { ...data, ...argsData };
+    }
+
+    return [logMessage, data];
+  }
+
+  error(message: string, ...args): void {
+    logger.error(...this.parseMessage(message, args));
+  }
+
+  warn(message: string, ...args): void {
+    logger.warn(...this.parseMessage(message, args));
+  }
+
+  info(message: string, ...args): void {
+    logger.info(...this.parseMessage(message, args));
+  }
+
+  debug(message: string, ...args): void {
+    logger.debug(...this.parseMessage(message, args));
+  }
+
+  verbose(message: string, ...args): void {
+    logger.trace(...this.parseMessage(message, args));
+  }
+
+}
+
+
+export const initLogger = (): void => {
+  // Enable global logger for OpenTelemetry
+  diag.setLogger(new DiagLoggerBunyanAdapter());
+};

+ 47 - 0
apps/app/src/features/opentelemetry/server/node-sdk-configuration.ts

@@ -0,0 +1,47 @@
+import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node';
+import { OTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-grpc';
+import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-grpc';
+import { Resource } from '@opentelemetry/resources';
+import { PeriodicExportingMetricReader } from '@opentelemetry/sdk-metrics';
+import type { NodeSDKConfiguration } from '@opentelemetry/sdk-node';
+import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION, SEMRESATTRS_SERVICE_INSTANCE_ID } from '@opentelemetry/semantic-conventions';
+
+
+export const generateNodeSDKConfiguration = (instanceId: string, version: string): Partial<NodeSDKConfiguration> => {
+  return {
+    resource: new Resource({
+      [ATTR_SERVICE_NAME]: 'growi',
+      [ATTR_SERVICE_VERSION]: version,
+      [SEMRESATTRS_SERVICE_INSTANCE_ID]: instanceId,
+    }),
+    traceExporter: new OTLPTraceExporter(),
+    metricReader: new PeriodicExportingMetricReader({
+      exporter: new OTLPMetricExporter(),
+      exportIntervalMillis: 10000,
+    }),
+    instrumentations: [getNodeAutoInstrumentations({
+      '@opentelemetry/instrumentation-bunyan': {
+        enabled: false,
+      },
+      // disable fs instrumentation since this generates very large amount of traces
+      // see: https://opentelemetry.io/docs/languages/js/libraries/#registration
+      '@opentelemetry/instrumentation-fs': {
+        enabled: false,
+      },
+    })],
+  };
+};
+
+// public async shutdownInstrumentation(): Promise<void> {
+//   await this.sdkInstance.shutdown();
+
+//   // メモ: 以下の restart コードは動かない
+//   // span/metrics ともに何も出なくなる
+//   // そもそも、restart するような使い方が出来なさそう?
+//   // see: https://github.com/open-telemetry/opentelemetry-specification/issues/27/
+//   // const sdk = new NodeSDK({...});
+//   // sdk.start();
+//   // await sdk.shutdown().catch(console.error);
+//   // const newSdk = new NodeSDK({...});
+//   // newSdk.start();
+// }

+ 87 - 0
apps/app/src/features/opentelemetry/server/start.ts

@@ -0,0 +1,87 @@
+import type { NodeSDK } from '@opentelemetry/sdk-node';
+
+import { configManager } from '~/server/service/config-manager';
+import loggerFactory from '~/utils/logger';
+
+
+const logger = loggerFactory('growi:opentelemetry:server');
+
+
+let sdkInstance: NodeSDK;
+
+/**
+ * Overwrite "OTEL_SDK_DISABLED" env var before sdk.start() is invoked if needed.
+ * Since otel library sees it.
+ */
+function overwriteSdkDisabled(): void {
+  const instrumentationEnabled = configManager.getConfig('crowi', 'otel:enabled');
+
+  if (instrumentationEnabled && (
+    process.env.OTEL_SDK_DISABLED === 'true'
+    || process.env.OTEL_SDK_DISABLED === '1'
+  )) {
+    logger.warn("OTEL_SDK_DISABLED overwritten with 'false' since GROWI's 'otel:enabled' config is true.");
+    process.env.OTEL_SDK_DISABLED = 'false';
+    return;
+  }
+
+  if (!instrumentationEnabled && (
+    process.env.OTEL_SDK_DISABLED === 'false'
+    || process.env.OTEL_SDK_DISABLED === '0'
+  )) {
+    logger.warn("OTEL_SDK_DISABLED is overwritten with 'true' since GROWI's 'otel:enabled' config is false.");
+    process.env.OTEL_SDK_DISABLED = 'true';
+    return;
+  }
+
+}
+
+export const startInstrumentation = async(version: string): Promise<void> => {
+  if (sdkInstance != null) {
+    logger.warn('OpenTelemetry instrumentation already started');
+    return;
+  }
+
+  overwriteSdkDisabled();
+
+  const instrumentationEnabled = configManager.getConfig('crowi', 'otel:enabled');
+  if (instrumentationEnabled) {
+
+    logger.info(`GROWI now collects anonymous telemetry.
+
+This data is used to help improve GROWI, but you can opt-out at any time.
+
+For more information, see https://docs.growi.org/en/admin-guide/telemetry.html.
+`);
+
+    // initialize global logger for development
+    const isDev = process.env.NODE_ENV === 'development';
+    if (isDev) {
+      const { initLogger } = await import('./logger');
+      initLogger();
+    }
+
+    // instanciate NodeSDK
+    const { NodeSDK } = await import('@opentelemetry/sdk-node');
+    const { generateNodeSDKConfiguration } = await import('./node-sdk-configuration');
+
+    const serviceInstanceId = configManager.getConfig('crowi', 'otel:serviceInstanceId');
+
+    sdkInstance = new NodeSDK(generateNodeSDKConfiguration(serviceInstanceId, version));
+    sdkInstance.start();
+  }
+};
+
+// public async shutdownInstrumentation(): Promise<void> {
+//   await this.sdkInstance.shutdown();
+
+//   // メモ: 以下の restart コードは動かない
+//   // span/metrics ともに何も出なくなる
+//   // そもそも、restart するような使い方が出来なさそう?
+//   // see: https://github.com/open-telemetry/opentelemetry-specification/issues/27/
+//   // const sdk = new NodeSDK({...});
+//   // sdk.start();
+//   // await sdk.shutdown().catch(console.error);
+//   // const newSdk = new NodeSDK({...});
+//   // newSdk.start();
+// }

+ 4 - 5
apps/app/src/server/crowi/index.js

@@ -14,6 +14,7 @@ import { KeycloakUserGroupSyncService } from '~/features/external-user-group/ser
 import { LdapUserGroupSyncService } from '~/features/external-user-group/server/service/ldap-user-group-sync';
 import { LdapUserGroupSyncService } from '~/features/external-user-group/server/service/ldap-user-group-sync';
 import OpenaiThreadDeletionCronService from '~/features/openai/server/services/thread-deletion-cron';
 import OpenaiThreadDeletionCronService from '~/features/openai/server/services/thread-deletion-cron';
 import OpenaiVectorStoreFileDeletionCronService from '~/features/openai/server/services/vector-store-file-deletion-cron';
 import OpenaiVectorStoreFileDeletionCronService from '~/features/openai/server/services/vector-store-file-deletion-cron';
+import { startInstrumentation } from '~/features/opentelemetry/server';
 import QuestionnaireService from '~/features/questionnaire/server/service/questionnaire';
 import QuestionnaireService from '~/features/questionnaire/server/service/questionnaire';
 import QuestionnaireCronService from '~/features/questionnaire/server/service/questionnaire-cron';
 import QuestionnaireCronService from '~/features/questionnaire/server/service/questionnaire-cron';
 import loggerFactory from '~/utils/logger';
 import loggerFactory from '~/utils/logger';
@@ -43,7 +44,6 @@ import { UserNotificationService } from '../service/user-notification';
 import { initializeYjsService } from '../service/yjs';
 import { initializeYjsService } from '../service/yjs';
 import { getModelSafely, getMongoUri, mongoOptions } from '../util/mongoose-utils';
 import { getModelSafely, getMongoUri, mongoOptions } from '../util/mongoose-utils';
 
 
-import { OpenTelemetry } from './opentelemetry';
 import { setupModelsDependentOnCrowi } from './setup-models';
 import { setupModelsDependentOnCrowi } from './setup-models';
 
 
 
 
@@ -146,6 +146,9 @@ Crowi.prototype.init = async function() {
   await this.setupSessionConfig();
   await this.setupSessionConfig();
   this.setupCron();
   this.setupCron();
 
 
+  // start OpenTelemetry
+  startInstrumentation(this.version);
+
   // setup messaging services
   // setup messaging services
   await this.setupS2sMessagingService();
   await this.setupS2sMessagingService();
   await this.setupSocketIoService();
   await this.setupSocketIoService();
@@ -460,10 +463,6 @@ Crowi.prototype.start = async function() {
   await this.init();
   await this.init();
   await this.buildServer();
   await this.buildServer();
 
 
-  // 具体的な設定値については、https://redmine.weseek.co.jp/issues/144351 で決定予定
-  const otel = new OpenTelemetry('next-app', this.version);
-  otel.startInstrumentation();
-
   // setup Next.js
   // setup Next.js
   this.nextApp = next({ dev });
   this.nextApp = next({ dev });
   await this.nextApp.prepare();
   await this.nextApp.prepare();

+ 0 - 85
apps/app/src/server/crowi/opentelemetry.ts

@@ -1,85 +0,0 @@
-import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node';
-import { OTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-grpc';
-import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-grpc';
-import { Resource } from '@opentelemetry/resources';
-import { PeriodicExportingMetricReader } from '@opentelemetry/sdk-metrics';
-import type { NodeSDKConfiguration } from '@opentelemetry/sdk-node';
-import { NodeSDK } from '@opentelemetry/sdk-node';
-import { TraceIdRatioBasedSampler } from '@opentelemetry/sdk-trace-node';
-import { SEMRESATTRS_SERVICE_NAME, SEMRESATTRS_SERVICE_INSTANCE_ID, SEMRESATTRS_SERVICE_VERSION } from '@opentelemetry/semantic-conventions';
-
-import loggerFactory from '~/utils/logger';
-
-import { configManager } from '../service/config-manager';
-
-const logger = loggerFactory('growi:opentelemetry');
-
-export class OpenTelemetry {
-
-  name: string;
-
-  version: string;
-
-  sdkInstance: NodeSDK;
-
-  constructor(name: string, version: string) {
-    this.name = name;
-    this.version = version;
-  }
-
-  private generateNodeSDKConfiguration(): Partial<NodeSDKConfiguration> {
-    return {
-      resource: new Resource({
-        [SEMRESATTRS_SERVICE_NAME]: this.name,
-        [SEMRESATTRS_SERVICE_INSTANCE_ID]: configManager.getConfig('crowi', 'instrumentation:serviceInstanceId'),
-        [SEMRESATTRS_SERVICE_VERSION]: this.version,
-      }),
-      traceExporter: new OTLPTraceExporter(),
-      metricReader: new PeriodicExportingMetricReader({
-        exporter: new OTLPMetricExporter(),
-        exportIntervalMillis: 10000,
-      }),
-      instrumentations: [getNodeAutoInstrumentations({
-        // disable fs instrumentation since this generates very large amount of traces
-        // see: https://opentelemetry.io/docs/languages/js/libraries/#registration
-        '@opentelemetry/instrumentation-fs': {
-          enabled: false,
-        },
-      })],
-    };
-  }
-
-  /**
-   * Overwrite "OTEL_SDK_DISABLED" env var before sdk.start() is invoked if needed.
-   * Since otel library sees it.
-   */
-  private overwriteSdkDisabled(): void {
-    const instrumentationEnabled = configManager.getConfig('crowi', 'instrumentation:enabled');
-    if (instrumentationEnabled != null && instrumentationEnabled === false) {
-      logger.warn("OTEL_SDK_DISABLED is set 'true' since GROWI's 'instrumentation:enabled' config is false.");
-      process.env.OTEL_SDK_DISABLED = 'true';
-    }
-  }
-
-  public startInstrumentation(): void {
-    this.overwriteSdkDisabled();
-
-    this.sdkInstance = new NodeSDK(this.generateNodeSDKConfiguration());
-    this.sdkInstance.start();
-  }
-
-  public async shutdownInstrumentation(): Promise<void> {
-    await this.sdkInstance.shutdown();
-
-    // メモ: 以下の restart コードは動かない
-    // span/metrics ともに何も出なくなる
-    // そもそも、restart するような使い方が出来なさそう?
-    // see: https://github.com/open-telemetry/opentelemetry-specification/issues/27/
-    // const sdk = new NodeSDK({...});
-    // sdk.start();
-    // await sdk.shutdown().catch(console.error);
-    // const newSdk = new NodeSDK({...});
-    // newSdk.start();
-  }
-
-}

+ 10 - 4
apps/app/src/server/service/config-loader.ts

@@ -751,17 +751,23 @@ const ENV_VAR_NAME_TO_CONFIG_INFO: Record<string, EnvConfig> = {
     type: ValueType.NUMBER,
     type: ValueType.NUMBER,
     default: 172800, // 2 days
     default: 172800, // 2 days
   },
   },
-  OPENTELEMETRY_INSTRUMENTATION_ENABLED: {
+  OPENTELEMETRY_ENABLED: {
     ns: 'crowi',
     ns: 'crowi',
-    key: 'instrumentation:enabled',
+    key: 'otel:enabled',
     type: ValueType.BOOLEAN,
     type: ValueType.BOOLEAN,
-    default: null,
+    default: true,
+  },
+  OPENTELEMETRY_IS_APP_SITE_URL_HASHED: {
+    ns: 'crowi',
+    key: 'otel:isAppSiteUrlHashed',
+    type: ValueType.BOOLEAN,
+    default: false,
   },
   },
   // TODO: fix after the decision of the instrumentation data specification
   // TODO: fix after the decision of the instrumentation data specification
   // https://redmine.weseek.co.jp/issues/144351
   // https://redmine.weseek.co.jp/issues/144351
   OPENTELEMETRY_SERVICE_INSTANCE_ID: {
   OPENTELEMETRY_SERVICE_INSTANCE_ID: {
     ns: 'crowi',
     ns: 'crowi',
-    key: 'instrumentation:serviceInstanceId',
+    key: 'otel:serviceInstanceId',
     type: ValueType.STRING,
     type: ValueType.STRING,
     default: null,
     default: null,
   },
   },