فهرست منبع

Add page access anonymization module and corresponding tests

Yuki Takei 9 ماه پیش
والد
کامیت
e44a07a5f5

+ 2 - 0
apps/app/src/features/opentelemetry/server/anonymization/handlers/index.ts

@@ -1,5 +1,6 @@
 import type { AnonymizationModule } from '../interfaces/anonymization-module';
 
+import { pageAccessModule } from './page-access-handler';
 import { pageApiModule } from './page-api-handler';
 import { pageListingApiModule } from './page-listing-api-handler';
 import { searchApiModule } from './search-api-handler';
@@ -11,4 +12,5 @@ export const anonymizationModules: AnonymizationModule[] = [
   searchApiModule,
   pageListingApiModule,
   pageApiModule,
+  pageAccessModule,
 ];

+ 96 - 0
apps/app/src/features/opentelemetry/server/anonymization/handlers/page-access-handler.spec.ts

@@ -0,0 +1,96 @@
+import type { IncomingMessage } from 'http';
+
+import { describe, it, expect } from 'vitest';
+
+import { pageAccessModule } from './page-access-handler';
+
+describe('pageAccessModule', () => {
+  describe('canHandle', () => {
+    it.each`
+      description                   | url                            | expected
+      ${'root path'}                | ${'/'}                         | ${false}
+      ${'API endpoint'}             | ${'/_api/v3/search'}           | ${false}
+      ${'static resource'}          | ${'/static/css/style.css'}     | ${false}
+      ${'favicon'}                  | ${'/favicon.ico'}              | ${false}
+      ${'assets'}                   | ${'/assets/image.png'}         | ${false}
+      ${'Next.js resource'}         | ${'/_next/chunk.js'}           | ${false}
+      ${'file with extension'}      | ${'/file.pdf'}                 | ${false}
+      ${'page path'}                | ${'/user/john/projects'}       | ${true}
+      ${'ObjectId path'}            | ${'/58a4569921a8424d00a1aa0e'} | ${true}
+      `('should return $expected for $description', ({ url, expected }) => {
+      const result = pageAccessModule.canHandle(url);
+      expect(result).toBe(expected);
+    });
+  });
+
+  describe('handle', () => {
+    describe('URL path anonymization', () => {
+      it.each`
+        description                     | url                                 | expectedPath
+        ${'root path'}                  | ${'/'}                              | ${null}
+        ${'empty path'}                 | ${''}                               | ${null}
+        ${'ObjectId path'}              | ${'/58a4569921a8424d00a1aa0e'}      | ${null}
+        ${'regular page path'}          | ${'/user/john/projects'}            | ${'/[HASHED:352fb6c6866e6ae8]'}
+        ${'Japanese page path'}         | ${'/path/to/ページ'}                | ${'/[HASHED:691184f7b886e43b]'}
+        ${'path with trailing slash'}   | ${'/user/john/'}                    | ${'/[HASHED:f15cf84e6a68ab30]/'}
+        ${'path with multiple slashes'} | ${'///user///john///'}              | ${'/[HASHED:96d9632f363564cc]/'}
+        ${'complex path'}               | ${'/wiki/project/documentation'}    | ${'/[HASHED:22ca1a8b9f281349]'}
+        ${'path with special chars'}    | ${'/user-name_123/project!'}        | ${'/[HASHED:7aa6a8f4468baa96]'}
+        ${'mixed case ObjectId'}        | ${'/58A4569921a8424d00a1aa0E'}      | ${null}
+        ${'lowercase ObjectId'}         | ${'/58a4569921a8424d00a1aa0e'}      | ${null}
+      `('should handle $description', ({ url, expectedPath }) => {
+        const mockRequest = {} as IncomingMessage;
+        const result = pageAccessModule.handle(mockRequest, url);
+
+        if (expectedPath === null) {
+          expect(result).toBeNull();
+        }
+        else {
+          expect(result).toEqual({
+            'http.target': expectedPath,
+          });
+        }
+      });
+    });
+
+    it('should preserve query parameters', () => {
+      const mockRequest = {} as IncomingMessage;
+      const url = '/user/john?tab=projects&sort=date';
+      const result = pageAccessModule.handle(mockRequest, url);
+
+      expect(result).toEqual({
+        'http.target': '/[HASHED:f15cf84e6a68ab30]?tab=projects&sort=date',
+      });
+    });
+
+    it('should handle complex query parameters', () => {
+      const mockRequest = {} as IncomingMessage;
+      const url = '/wiki/page?search=test&tags[]=tag1&tags[]=tag2&limit=10';
+      const result = pageAccessModule.handle(mockRequest, url);
+
+      expect(result).toEqual({
+        'http.target': '/[HASHED:2f4a824f8eacbc70]?search=test&tags[]=tag1&tags[]=tag2&limit=10',
+      });
+    });
+
+    it('should handle invalid URLs gracefully', () => {
+      const mockRequest = {} as IncomingMessage;
+      const invalidUrl = 'not-a-valid-url';
+      const result = pageAccessModule.handle(mockRequest, invalidUrl);
+
+      // The function should return anonymized result even for invalid URLs
+      // as it tries to process them as paths
+      expect(result).toEqual({
+        'http.target': '/[HASHED:66df542c298792e1]',
+      });
+    });
+
+    it('should handle URL parsing errors gracefully', () => {
+      const mockRequest = {} as IncomingMessage;
+      // This should trigger an error in URL parsing and return null
+      const result = pageAccessModule.handle(mockRequest, 'http://[invalid');
+
+      expect(result).toBeNull();
+    });
+  });
+});

+ 122 - 0
apps/app/src/features/opentelemetry/server/anonymization/handlers/page-access-handler.ts

@@ -0,0 +1,122 @@
+import { createHash } from 'crypto';
+import type { IncomingMessage } from 'http';
+
+import { isCreatablePage } from '@growi/core/dist/utils/page-path-utils';
+import { diag } from '@opentelemetry/api';
+
+import { ATTR_HTTP_TARGET } from '../../semconv';
+import type { AnonymizationModule } from '../interfaces/anonymization-module';
+
+const logger = diag.createComponentLogger({ namespace: 'growi:anonymization:page-access-handler' });
+
+/**
+ * Check if a string is a MongoDB ObjectId (24 hex characters)
+ */
+function isObjectId(str: string): boolean {
+  return /^[0-9a-fA-F]{24}$/.test(str);
+}
+
+/**
+ * Create a hash of the given string
+ */
+function hashString(str: string): string {
+  return createHash('sha256').update(str).digest('hex').substring(0, 16);
+}
+
+/**
+ * Anonymize URL path by hashing non-ObjectId paths
+ * @param urlPath - The URL path to anonymize
+ * @returns Anonymized URL path
+ */
+function anonymizeUrlPath(urlPath: string): string {
+  try {
+    // Remove leading/trailing slashes for processing
+    const cleanPath = urlPath.replace(/^\/+|\/+$/g, '');
+
+    // If empty path, return as-is
+    if (!cleanPath) {
+      return urlPath;
+    }
+
+    // If it's an ObjectId, don't anonymize
+    if (isObjectId(cleanPath)) {
+      return urlPath;
+    }
+
+    // Hash the path and return with original slash structure
+    const hashedPath = hashString(cleanPath);
+    const leadingSlash = urlPath.startsWith('/') ? '/' : '';
+    const trailingSlash = urlPath.endsWith('/') && urlPath.length > 1 ? '/' : '';
+
+    return `${leadingSlash}[HASHED:${hashedPath}]${trailingSlash}`;
+  }
+  catch (error) {
+    logger.warn(`Failed to anonymize URL path: ${error}`);
+    return urlPath;
+  }
+}
+
+/**
+ * Page access anonymization module for non-API requests
+ */
+export const pageAccessModule: AnonymizationModule = {
+  /**
+   * Check if this module can handle page access requests (non-API)
+   */
+  canHandle(url: string): boolean {
+    try {
+      const parsedUrl = new URL(url, 'http://localhost');
+      const path = parsedUrl.pathname;
+
+      // Exclude root path
+      if (path === '/') return false;
+
+      // Exclude static resources first
+      if (path.includes('/static/')
+        || path.includes('/_next/')
+        || path.includes('/favicon')
+        || path.includes('/assets/')
+        || path.includes('.')) { // Exclude file extensions (images, css, js, etc.)
+        return false;
+      }
+
+      // Use GROWI's isCreatablePage logic to determine if this is a valid page path
+      // This excludes API endpoints, system paths, etc.
+      return isCreatablePage(path);
+    }
+    catch {
+      // If URL parsing fails, don't handle it
+      return false;
+    }
+  },
+
+  /**
+   * Handle anonymization for page access requests
+   */
+  handle(request: IncomingMessage, url: string): Record<string, string> | null {
+    try {
+      const parsedUrl = new URL(url, 'http://localhost');
+      const originalPath = parsedUrl.pathname;
+
+      // Anonymize the URL path
+      const anonymizedPath = anonymizeUrlPath(originalPath);
+
+      // Only return attributes if path was actually anonymized
+      if (anonymizedPath !== originalPath) {
+        const anonymizedUrl = anonymizedPath + parsedUrl.search;
+
+        logger.debug(`Anonymized page access URL: ${url} -> ${anonymizedUrl}`);
+
+        return {
+          [ATTR_HTTP_TARGET]: anonymizedUrl,
+        };
+      }
+
+      return null;
+    }
+    catch (error) {
+      logger.warn(`Failed to anonymize page access URL: ${error}`);
+      return null;
+    }
+  },
+};