Просмотр исходного кода

Merge pull request #9902 from weseek/fix/set-service-instance-id-for-otel

fix: Set OpenTelemetry resource attribute `service.instance.id`
mergify[bot] 11 месяцев назад
Родитель
Сommit
f7c9ab63a7

+ 0 - 14
apps/app/src/features/opentelemetry/server/node-sdk-configuration.ts

@@ -51,17 +51,3 @@ export const generateNodeSDKConfiguration = (serviceInstanceId?: string): Config
 
   return configuration;
 };
-
-// public async shutdownInstrumentation(): Promise<void> {
-//   await this.sdkInstance.shutdown();
-
-//   // メモ: 以下の restart コードは動かない
-//   // span/metrics ともに何も出なくなる
-//   // そもそも、restart するような使い方が出来なさそう?
-//   // see: https://github.com/open-telemetry/opentelemetry-specification/issues/27/
-//   // const sdk = new NodeSDK({...});
-//   // sdk.start();
-//   // await sdk.shutdown().catch(console.error);
-//   // const newSdk = new NodeSDK({...});
-//   // newSdk.start();
-// }

+ 33 - 0
apps/app/src/features/opentelemetry/server/node-sdk-resource.ts

@@ -0,0 +1,33 @@
+import { Resource } from '@opentelemetry/resources';
+import type { NodeSDK } from '@opentelemetry/sdk-node';
+
+/**
+ * Get resource from SDK instance
+ * Note: This uses internal API of NodeSDK
+ */
+export const getResource = (sdk: NodeSDK): Resource => {
+  // This cast is necessary as _resource is a private property
+  const resource = (sdk as any)._resource;
+  if (!(resource instanceof Resource)) {
+    throw new Error('Failed to access SDK resource');
+  }
+  return resource;
+};
+
+/**
+ * Set resource to SDK instance
+ * Note: This uses internal API of NodeSDK
+ * @throws Error if resource cannot be set
+ */
+export const setResource = (sdk: NodeSDK, resource: Resource): void => {
+  // Verify that we can access the _resource property
+  try {
+    getResource(sdk);
+  }
+  catch (e) {
+    throw new Error('Failed to access SDK resource');
+  }
+
+  // This cast is necessary as _resource is a private property
+  (sdk as any)._resource = resource;
+};

+ 135 - 0
apps/app/src/features/opentelemetry/server/node-sdk.spec.ts

@@ -0,0 +1,135 @@
+import { ConfigSource } from '@growi/core/dist/interfaces';
+import { Resource } from '@opentelemetry/resources';
+import { NodeSDK } from '@opentelemetry/sdk-node';
+
+import { configManager } from '~/server/service/config-manager';
+
+import { detectServiceInstanceId, initInstrumentation } from './node-sdk';
+import { getResource } from './node-sdk-resource';
+import { getSdkInstance, resetSdkInstance } from './node-sdk.testing';
+
+// Only mock configManager as it's external to what we're testing
+vi.mock('~/server/service/config-manager', () => ({
+  configManager: {
+    getConfig: vi.fn(),
+    loadConfigs: vi.fn(),
+  },
+}));
+
+describe('node-sdk', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    vi.resetModules();
+    resetSdkInstance();
+
+    // Reset configManager mock implementation
+    vi.mocked(configManager.getConfig).mockImplementation((key: string, source?: ConfigSource) => {
+      // For otel:enabled, always expect ConfigSource.env
+      if (key === 'otel:enabled') {
+        return source === ConfigSource.env ? true : undefined;
+      }
+      return undefined;
+    });
+  });
+
+  describe('detectServiceInstanceId', () => {
+    it('should update service.instance.id when app:serviceInstanceId is available', async() => {
+      // Initialize SDK first
+      await initInstrumentation();
+
+      // Get instance for testing
+      const sdkInstance = getSdkInstance();
+      expect(sdkInstance).toBeDefined();
+      expect(sdkInstance).toBeInstanceOf(NodeSDK);
+
+      // Verify initial state (service.instance.id should not be set)
+      if (sdkInstance == null) {
+        throw new Error('SDK instance should be defined');
+      }
+
+      // Mock app:serviceInstanceId is available
+      vi.mocked(configManager.getConfig).mockImplementation((key: string, source?: ConfigSource) => {
+        // For otel:enabled, always expect ConfigSource.env
+        if (key === 'otel:enabled') {
+          return source === ConfigSource.env ? true : undefined;
+        }
+
+        // For service instance IDs, only respond when no source is specified
+        if (key === 'app:serviceInstanceId') return 'test-instance-id';
+        return undefined;
+      });
+
+      const resource = getResource(sdkInstance);
+      expect(resource).toBeInstanceOf(Resource);
+      expect(resource.attributes['service.instance.id']).toBeUndefined();
+
+      // Call detectServiceInstanceId
+      await detectServiceInstanceId();
+
+      // Verify that resource was updated with app:serviceInstanceId
+      const updatedResource = getResource(sdkInstance);
+      expect(updatedResource.attributes['service.instance.id']).toBe('test-instance-id');
+    });
+
+    it('should update service.instance.id with otel:serviceInstanceId if available', async() => {
+      // Initialize SDK
+      await initInstrumentation();
+
+      // Get instance and verify initial state
+      const sdkInstance = getSdkInstance();
+      if (sdkInstance == null) {
+        throw new Error('SDK instance should be defined');
+      }
+      const resource = getResource(sdkInstance);
+      expect(resource.attributes['service.instance.id']).toBeUndefined();
+
+      // Mock otel:serviceInstanceId is available
+      vi.mocked(configManager.getConfig).mockImplementation((key: string, source?: ConfigSource) => {
+        // For otel:enabled, always expect ConfigSource.env
+        if (key === 'otel:enabled') {
+          return source === ConfigSource.env ? true : undefined;
+        }
+
+        // For service instance IDs, only respond when no source is specified
+        if (source === undefined) {
+          if (key === 'otel:serviceInstanceId') return 'otel-instance-id';
+          if (key === 'app:serviceInstanceId') return 'test-instance-id';
+        }
+
+        return undefined;
+      });
+
+      // Call detectServiceInstanceId
+      await detectServiceInstanceId();
+
+      // Verify that otel:serviceInstanceId was used
+      const updatedResource = getResource(sdkInstance);
+      expect(updatedResource.attributes['service.instance.id']).toBe('otel-instance-id');
+    });
+
+    it('should not create SDK instance if instrumentation is disabled', async() => {
+      // Mock instrumentation as disabled
+      vi.mocked(configManager.getConfig).mockImplementation((key: string, source?: ConfigSource) => {
+        // For otel:enabled, always expect ConfigSource.env and return false
+        if (key === 'otel:enabled') {
+          return source === ConfigSource.env ? false : undefined;
+        }
+        return undefined;
+      });
+
+      // Initialize SDK
+      await initInstrumentation();
+
+      // Verify that no SDK instance was created
+      const sdkInstance = getSdkInstance();
+      expect(sdkInstance).toBeUndefined();
+
+      // Call detectServiceInstanceId
+      await detectServiceInstanceId();
+
+      // Verify that still no SDK instance exists
+      const updatedSdkInstance = getSdkInstance();
+      expect(updatedSdkInstance).toBeUndefined();
+    });
+  });
+});

+ 24 - 0
apps/app/src/features/opentelemetry/server/node-sdk.testing.ts

@@ -0,0 +1,24 @@
+/**
+ * This module provides testing APIs for node-sdk.ts
+ * It should be imported only in test files
+ */
+
+import type { NodeSDK } from '@opentelemetry/sdk-node';
+
+import { __testing__ } from './node-sdk';
+
+/**
+ * Get the current SDK instance
+ * This function should only be used in tests
+ */
+export const getSdkInstance = (): NodeSDK | undefined => {
+  return __testing__.getSdkInstance();
+};
+
+/**
+ * Reset the SDK instance
+ * This function should be used to clean up between tests
+ */
+export const resetSdkInstance = (): void => {
+  __testing__.reset();
+};

+ 30 - 23
apps/app/src/features/opentelemetry/server/node-sdk.ts

@@ -4,10 +4,11 @@ import type { NodeSDK } from '@opentelemetry/sdk-node';
 import { configManager } from '~/server/service/config-manager';
 import loggerFactory from '~/utils/logger';
 
-const logger = loggerFactory('growi:opentelemetry:server');
+import { setResource } from './node-sdk-resource';
 
+const logger = loggerFactory('growi:opentelemetry:server');
 
-let sdkInstance: NodeSDK;
+let sdkInstance: NodeSDK | undefined;
 
 /**
  * Overwrite "OTEL_SDK_DISABLED" env var before sdk.start() is invoked if needed.
@@ -33,10 +34,9 @@ function overwriteSdkDisabled(): void {
     process.env.OTEL_SDK_DISABLED = 'true';
     return;
   }
-
 }
 
-export const startInstrumentation = async(): Promise<void> => {
+export const initInstrumentation = async(): Promise<void> => {
   if (sdkInstance != null) {
     logger.warn('OpenTelemetry instrumentation already started');
     return;
@@ -49,7 +49,6 @@ export const startInstrumentation = async(): Promise<void> => {
 
   const instrumentationEnabled = configManager.getConfig('otel:enabled', ConfigSource.env);
   if (instrumentationEnabled) {
-
     logger.info(`GROWI now collects anonymous telemetry.
 
 This data is used to help improve GROWI, but you can opt-out at any time.
@@ -69,35 +68,43 @@ For more information, see https://docs.growi.org/en/admin-guide/admin-cookbook/t
     const { generateNodeSDKConfiguration } = await import('./node-sdk-configuration');
 
     sdkInstance = new NodeSDK(generateNodeSDKConfiguration());
-    sdkInstance.start();
   }
 };
 
-export const initServiceInstanceId = async(): Promise<void> => {
+export const detectServiceInstanceId = async(): Promise<void> => {
   const instrumentationEnabled = configManager.getConfig('otel:enabled', ConfigSource.env);
 
   if (instrumentationEnabled) {
+    if (sdkInstance == null) {
+      throw new Error('OpenTelemetry instrumentation is not initialized');
+    }
+
     const { generateNodeSDKConfiguration } = await import('./node-sdk-configuration');
 
     const serviceInstanceId = configManager.getConfig('otel:serviceInstanceId')
       ?? configManager.getConfig('app:serviceInstanceId');
 
-    // overwrite resource
-    const updatedResource = generateNodeSDKConfiguration(serviceInstanceId).resource;
-    (sdkInstance as any).resource = updatedResource;
+    // Update resource with new service instance id
+    const newConfig = generateNodeSDKConfiguration(serviceInstanceId);
+    setResource(sdkInstance, newConfig.resource);
   }
 };
 
-// public async shutdownInstrumentation(): Promise<void> {
-//   await this.sdkInstance.shutdown();
-
-//   // メモ: 以下の restart コードは動かない
-//   // span/metrics ともに何も出なくなる
-//   // そもそも、restart するような使い方が出来なさそう?
-//   // see: https://github.com/open-telemetry/opentelemetry-specification/issues/27/
-//   // const sdk = new NodeSDK({...});
-//   // sdk.start();
-//   // await sdk.shutdown().catch(console.error);
-//   // const newSdk = new NodeSDK({...});
-//   // newSdk.start();
-// }
+export const startOpenTelemetry = (): void => {
+  const instrumentationEnabled = configManager.getConfig('otel:enabled', ConfigSource.env);
+
+  if (instrumentationEnabled && sdkInstance != null) {
+    if (sdkInstance == null) {
+      throw new Error('OpenTelemetry instrumentation is not initialized');
+    }
+    sdkInstance.start();
+  }
+};
+
+// For testing purposes only
+export const __testing__ = {
+  getSdkInstance: (): NodeSDK | undefined => sdkInstance,
+  reset: (): void => {
+    sdkInstance = undefined;
+  },
+};

+ 6 - 4
apps/app/src/server/app.ts

@@ -1,6 +1,6 @@
 import type Logger from 'bunyan';
 
-import { initServiceInstanceId, startInstrumentation } from '~/features/opentelemetry/server';
+import { initInstrumentation, detectServiceInstanceId, startOpenTelemetry } from '~/features/opentelemetry/server';
 import loggerFactory from '~/utils/logger';
 import { hasProcessFlag } from '~/utils/process-utils';
 
@@ -20,14 +20,16 @@ process.on('unhandledRejection', (reason, p) => {
 
 async function main() {
   try {
-    // start OpenTelemetry
-    await startInstrumentation();
+    // Initialize OpenTelemetry
+    await initInstrumentation();
 
     const Crowi = (await import('./crowi')).default;
     const growi = new Crowi();
     const server = await growi.start();
 
-    await initServiceInstanceId();
+    // Start OpenTelemetry
+    await detectServiceInstanceId();
+    startOpenTelemetry();
 
     if (hasProcessFlag('ci')) {
       logger.info('"--ci" flag is detected. Exit process.');