Yuki Takei 9 месяцев назад
Родитель
Сommit
52945b55cc

+ 5 - 4
apps/app/src/features/opentelemetry/docs/improvement-task.md

@@ -22,12 +22,13 @@
   - [ ] 検索回数カウンター
 
 ### Phase 2: データ匿名化
-- [ ] **HTTP匿名化**
-  - [ ] クエリパラメータマスキング(`q`, `query`, `search`)
+- [x] **HTTP匿名化**
+  - [x] クエリパラメータマスキング(`q`, `query`, `search`)
+  - [x] HTTP Instrumentation統合
   - [ ] 認証ヘッダー除外(`authorization`, `cookie`)
   - [ ] IPアドレス部分マスキング
-- [ ] **設定機能**
-  - [ ] 匿名化有効/無効制御
+- [x] **設定機能**
+  - [x] 匿名化有効/無効制御(環境変数)
   - [ ] カスタム除外パターン設定
 
 ### Phase 3: 統合・テスト

+ 148 - 0
apps/app/src/features/opentelemetry/server/anonymization/anonymize-http-requests.ts

@@ -0,0 +1,148 @@
+import { diag } from '@opentelemetry/api';
+import type { Span } from '@opentelemetry/api';
+import type { InstrumentationConfigMap } from '@opentelemetry/auto-instrumentations-node';
+
+const logger = diag.createComponentLogger({ namespace: 'growi:anonymization:query' });
+
+export interface AnonymizeHttpRequestsConfig {
+  sensitiveParams: string[];
+  maskPattern: string;
+}
+
+function escapeRegExp(string: string): string {
+  return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}
+
+function anonymizeQueryString(query: string, config: AnonymizeHttpRequestsConfig): string {
+  let result = query;
+
+  config.sensitiveParams.forEach((param) => {
+    // Match with (?:^|[?&])param=([^&]*) pattern
+    const pattern = new RegExp(`((?:^|[?&])${escapeRegExp(param)}=)([^&]*)`, 'gi');
+    result = result.replace(pattern, `$1${config.maskPattern}`);
+  });
+
+  return result;
+}
+
+function anonymizeUrl(url: string, config: AnonymizeHttpRequestsConfig): string {
+  try {
+    const urlObj = new URL(url);
+
+    // Mask sensitive parameters
+    let modified = false;
+    config.sensitiveParams.forEach((param) => {
+      if (urlObj.searchParams.has(param)) {
+        urlObj.searchParams.set(param, config.maskPattern);
+        modified = true;
+      }
+    });
+
+    return modified ? urlObj.toString() : url;
+  }
+  catch (error) {
+    // Fallback to regex if URL parsing fails
+    return anonymizeQueryString(url, config);
+  }
+}
+
+function anonymizeTarget(target: string, config: AnonymizeHttpRequestsConfig): string {
+  // target is usually in the format "/path?query=value"
+  const queryIndex = target.indexOf('?');
+  if (queryIndex === -1) {
+    return target; // No query parameters
+  }
+
+  const path = target.substring(0, queryIndex);
+  const queryString = target.substring(queryIndex + 1);
+  const anonymizedQuery = anonymizeQueryString(queryString, config);
+
+  return `${path}?${anonymizedQuery}`;
+}
+
+function anonymizeHttpRequests(span: Span, config: AnonymizeHttpRequestsConfig): void {
+  try {
+    // When used in RequestHook, it's more reliable to anonymize from
+    // request information rather than accessing the span directly
+    // Here we check attributes directly and anonymize them
+
+    // Check common HTTP attributes
+    const httpUrlAttr = 'http.url';
+    const urlQueryAttr = 'url.query';
+    const httpTargetAttr = 'http.target';
+
+    // Access to internal properties may be needed depending on span implementation
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const internalSpan = span as any;
+    const attributes = internalSpan.attributes || internalSpan._attributes || {};
+
+    // Get http.url attribute
+    const httpUrl = attributes[httpUrlAttr];
+    if (typeof httpUrl === 'string') {
+      const anonymizedUrl = anonymizeUrl(httpUrl, config);
+      if (anonymizedUrl !== httpUrl) {
+        span.setAttribute(httpUrlAttr, anonymizedUrl);
+        logger.debug('Anonymized http.url attribute');
+      }
+    }
+
+    // Get url.query attribute
+    const urlQuery = attributes[urlQueryAttr];
+    if (typeof urlQuery === 'string') {
+      const anonymizedQuery = anonymizeQueryString(urlQuery, config);
+      if (anonymizedQuery !== urlQuery) {
+        span.setAttribute(urlQueryAttr, anonymizedQuery);
+        logger.debug('Anonymized url.query attribute');
+      }
+    }
+
+    // Get http.target attribute (path + query parameters)
+    const httpTarget = attributes[httpTargetAttr];
+    if (typeof httpTarget === 'string') {
+      const anonymizedTarget = anonymizeTarget(httpTarget, config);
+      if (anonymizedTarget !== httpTarget) {
+        span.setAttribute(httpTargetAttr, anonymizedTarget);
+        logger.debug('Anonymized http.target attribute');
+      }
+    }
+  }
+  catch (error) {
+    logger.error('Failed to anonymize search queries', { error });
+  }
+}
+
+export const httpInstrumentationConfig: InstrumentationConfigMap['@opentelemetry/instrumentation-http'] = {
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  requestHook: (span: any, request: any) => {
+    try {
+      // Anonymize search queries
+      const anonymizationConfig = {
+        enabled: true,
+        sensitiveParams: ['q', 'query', 'search', 'term', 'keyword'],
+        maskPattern: '***',
+      };
+
+      // Anonymize from request URL
+      const url = request.url || '';
+      if (url.includes('?')) {
+        const urlObj = new URL(url, 'http://localhost');
+        let modified = false;
+        anonymizationConfig.sensitiveParams.forEach((param) => {
+          if (urlObj.searchParams.has(param)) {
+            urlObj.searchParams.set(param, anonymizationConfig.maskPattern);
+            modified = true;
+          }
+        });
+
+        if (modified) {
+          span.setAttribute('http.url', urlObj.toString());
+          span.setAttribute('http.target', urlObj.pathname + urlObj.search);
+        }
+      }
+    }
+    catch (error) {
+      // eslint-disable-next-line no-console
+      console.error('Failed to anonymize request:', error);
+    }
+  },
+};

+ 1 - 0
apps/app/src/features/opentelemetry/server/anonymization/index.ts

@@ -0,0 +1 @@
+export * from './anonymize-http-requests';

+ 10 - 1
apps/app/src/features/opentelemetry/server/node-sdk-configuration.ts

@@ -8,6 +8,7 @@ import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION, SEMRESATTRS_SERVICE_INSTANCE_I
 
 import { getGrowiVersion } from '~/utils/growi-version';
 
+import { httpInstrumentationConfig as httpInstrumentationConfigForAnonymize } from './anonymization';
 import { addApplicationMetrics, addSystemMetrics } from './custom-metrics';
 
 type Configuration = Partial<NodeSDKConfiguration> & {
@@ -17,7 +18,7 @@ type Configuration = Partial<NodeSDKConfiguration> & {
 let resource: Resource;
 let configuration: Configuration;
 
-export const generateNodeSDKConfiguration = (serviceInstanceId?: string): Configuration => {
+export const generateNodeSDKConfiguration = (serviceInstanceId?: string, enableAnonymization = false): Configuration => {
   if (configuration == null) {
     const version = getGrowiVersion();
 
@@ -26,6 +27,9 @@ export const generateNodeSDKConfiguration = (serviceInstanceId?: string): Config
       [ATTR_SERVICE_VERSION]: version,
     });
 
+    // Data anonymization configuration
+    const httpInstrumentationConfig = enableAnonymization ? httpInstrumentationConfigForAnonymize : {};
+
     configuration = {
       resource,
       traceExporter: new OTLPTraceExporter(),
@@ -41,6 +45,11 @@ export const generateNodeSDKConfiguration = (serviceInstanceId?: string): Config
         '@opentelemetry/instrumentation-fs': {
           enabled: false,
         },
+        // HTTP instrumentation with anonymization
+        '@opentelemetry/instrumentation-http': {
+          enabled: true,
+          ...httpInstrumentationConfig,
+        },
       })],
     };
 

+ 7 - 2
apps/app/src/features/opentelemetry/server/node-sdk.ts

@@ -66,8 +66,10 @@ For more information, see https://docs.growi.org/en/admin-guide/admin-cookbook/t
     // instanciate NodeSDK
     const { NodeSDK } = await import('@opentelemetry/sdk-node');
     const { generateNodeSDKConfiguration } = await import('./node-sdk-configuration');
+    // get resource from configuration
+    const anonymizationEnabled = configManager.getConfig('otel:anonymizeInBestEffort', ConfigSource.env);
 
-    sdkInstance = new NodeSDK(generateNodeSDKConfiguration());
+    sdkInstance = new NodeSDK(generateNodeSDKConfiguration(undefined, anonymizationEnabled));
   }
 };
 
@@ -84,8 +86,11 @@ export const detectServiceInstanceId = async(): Promise<void> => {
     const serviceInstanceId = configManager.getConfig('otel:serviceInstanceId')
       ?? configManager.getConfig('app:serviceInstanceId');
 
+    // get resource from configuration
+    const anonymizationEnabled = configManager.getConfig('otel:anonymizeInBestEffort', ConfigSource.env);
+
     // Update resource with new service instance id
-    const newConfig = generateNodeSDKConfiguration(serviceInstanceId);
+    const newConfig = generateNodeSDKConfiguration(serviceInstanceId, anonymizationEnabled);
     setResource(sdkInstance, newConfig.resource);
   }
 };

+ 5 - 0
apps/app/src/server/service/config-manager/config-definition.ts

@@ -268,6 +268,7 @@ export const CONFIG_KEYS = [
   // OpenTelemetry Settings
   'otel:enabled',
   'otel:isAppSiteUrlHashed',
+  'otel:anonymizeInBestEffort',
   'otel:serviceInstanceId',
 
   // S2S Messaging Pubsub Settings
@@ -1133,6 +1134,10 @@ export const CONFIG_DEFINITIONS = {
     envVarName: 'OPENTELEMETRY_IS_APP_SITE_URL_HASHED',
     defaultValue: false,
   }),
+  'otel:anonymizeInBestEffort': defineConfig<boolean>({
+    envVarName: 'OPENTELEMETRY_ANNONYMIZE_IN_BEST_EFFORT',
+    defaultValue: false,
+  }),
   'otel:serviceInstanceId': defineConfig<string | undefined>({
     envVarName: 'OPENTELEMETRY_SERVICE_INSTANCE_ID',
     defaultValue: undefined,