Просмотр исходного кода

Enhance page access anonymization to handle user pages and exclude specific paths

Yuki Takei 9 месяцев назад
Родитель
Сommit
691c821481

+ 9 - 9
apps/app/src/features/opentelemetry/server/anonymization/handlers/page-access-handler.spec.ts

@@ -15,8 +15,11 @@ describe('pageAccessModule', () => {
       ${'assets'}                   | ${'/assets/image.png'}         | ${false}
       ${'Next.js resource'}         | ${'/_next/chunk.js'}           | ${false}
       ${'file with extension'}      | ${'/file.pdf'}                 | ${false}
-      ${'page path'}                | ${'/user/john/projects'}       | ${true}
-      ${'ObjectId path'}            | ${'/58a4569921a8424d00a1aa0e'} | ${true}
+      ${'Users top page'}           | ${'/user'}                     | ${false}
+      ${'Users homepage'}           | ${'/user/john'}                | ${true}
+      ${'Users page'}               | ${'/user/john/projects'}       | ${true}
+      ${'page path'}                | ${'/path/to/page'}             | ${true}
+      ${'ObjectId path'}            | ${'/58a4569921a8424d00a1aa0e'} | ${false}
       `('should return $expected for $description', ({ url, expected }) => {
       const result = pageAccessModule.canHandle(url);
       expect(result).toBe(expected);
@@ -29,15 +32,12 @@ describe('pageAccessModule', () => {
         description                     | url                                 | expectedPath
         ${'root path'}                  | ${'/'}                              | ${null}
         ${'empty path'}                 | ${''}                               | ${null}
-        ${'ObjectId path'}              | ${'/58a4569921a8424d00a1aa0e'}      | ${null}
-        ${'regular page path'}          | ${'/user/john/projects'}            | ${'/[HASHED:352fb6c6866e6ae8]'}
+        ${'Users homepage'}             | ${'/user/john'}                     | ${'/user/[USERNAME_HASHED:96d9632f363564cc]'}
+        ${'regular page path'}          | ${'/user/john/projects'}            | ${'/user/[USERNAME_HASHED:96d9632f363564cc]/[HASHED:2577c0f557b2e4b5]'}
         ${'Japanese page path'}         | ${'/path/to/ページ'}                | ${'/[HASHED:691184f7b886e43b]'}
-        ${'path with trailing slash'}   | ${'/user/john/'}                    | ${'/[HASHED:f15cf84e6a68ab30]/'}
-        ${'path with multiple slashes'} | ${'///user///john///'}              | ${'/[HASHED:96d9632f363564cc]/'}
         ${'complex path'}               | ${'/wiki/project/documentation'}    | ${'/[HASHED:22ca1a8b9f281349]'}
         ${'path with special chars'}    | ${'/user-name_123/project!'}        | ${'/[HASHED:7aa6a8f4468baa96]'}
-        ${'mixed case ObjectId'}        | ${'/58A4569921a8424d00a1aa0E'}      | ${null}
-        ${'lowercase ObjectId'}         | ${'/58a4569921a8424d00a1aa0e'}      | ${null}
+        ${'ObjectId path'}              | ${'/58a4569921a8424d00a1aa0e'}      | ${null}
       `('should handle $description', ({ url, expectedPath }) => {
         const mockRequest = {} as IncomingMessage;
         const result = pageAccessModule.handle(mockRequest, url);
@@ -59,7 +59,7 @@ describe('pageAccessModule', () => {
       const result = pageAccessModule.handle(mockRequest, url);
 
       expect(result).toEqual({
-        'http.target': '/[HASHED:f15cf84e6a68ab30]?tab=projects&sort=date',
+        'http.target': '/user/[USERNAME_HASHED:96d9632f363564cc]?tab=projects&sort=date',
       });
     });
 

+ 49 - 14
apps/app/src/features/opentelemetry/server/anonymization/handlers/page-access-handler.ts

@@ -1,7 +1,14 @@
 import { createHash } from 'crypto';
 import type { IncomingMessage } from 'http';
 
-import { isCreatablePage } from '@growi/core/dist/utils/page-path-utils';
+import {
+  isCreatablePage,
+  isUsersHomepage,
+  isUserPage,
+  isUsersTopPage,
+  isPermalink,
+  getUsernameByPath,
+} from '@growi/core/dist/utils/page-path-utils';
 import { diag } from '@opentelemetry/api';
 
 import { ATTR_HTTP_TARGET } from '../../semconv';
@@ -9,13 +16,6 @@ import type { AnonymizationModule } from '../interfaces/anonymization-module';
 
 const logger = diag.createComponentLogger({ namespace: 'growi:anonymization:page-access-handler' });
 
-/**
- * Check if a string is a MongoDB ObjectId (24 hex characters)
- */
-function isObjectId(str: string): boolean {
-  return /^[0-9a-fA-F]{24}$/.test(str);
-}
-
 /**
  * Create a hash of the given string
  */
@@ -30,7 +30,38 @@ function hashString(str: string): string {
  */
 function anonymizeUrlPath(urlPath: string): string {
   try {
-    // Remove leading/trailing slashes for processing
+    // If it's a permalink (ObjectId), don't anonymize
+    if (isPermalink(urlPath)) {
+      return urlPath;
+    }
+
+    // Handle user pages specially
+    if (isUserPage(urlPath)) {
+      const username = getUsernameByPath(urlPath);
+
+      if (isUsersHomepage(urlPath) && username) {
+        // For user homepage (/user/john), anonymize only the username
+        const hashedUsername = hashString(username);
+        return `/user/[USERNAME_HASHED:${hashedUsername}]`;
+      }
+
+      if (username) {
+        // For user sub-pages (/user/john/projects), anonymize username and remaining path separately
+        const hashedUsername = hashString(username);
+        const remainingPath = urlPath.replace(`/user/${username}`, '');
+
+        if (remainingPath) {
+          const cleanRemainingPath = remainingPath.replace(/^\/+|\/+$/g, '');
+          const hashedRemainingPath = hashString(cleanRemainingPath);
+          const leadingSlash = remainingPath.startsWith('/') ? '/' : '';
+          const trailingSlash = remainingPath.endsWith('/') && remainingPath.length > 1 ? '/' : '';
+
+          return `/user/[USERNAME_HASHED:${hashedUsername}]${leadingSlash}[HASHED:${hashedRemainingPath}]${trailingSlash}`;
+        }
+      }
+    }
+
+    // For regular pages, use the original logic
     const cleanPath = urlPath.replace(/^\/+|\/+$/g, '');
 
     // If empty path, return as-is
@@ -38,11 +69,6 @@ function anonymizeUrlPath(urlPath: string): string {
       return urlPath;
     }
 
-    // If it's an ObjectId, don't anonymize
-    if (isObjectId(cleanPath)) {
-      return urlPath;
-    }
-
     // Hash the path and return with original slash structure
     const hashedPath = hashString(cleanPath);
     const leadingSlash = urlPath.startsWith('/') ? '/' : '';
@@ -80,6 +106,15 @@ export const pageAccessModule: AnonymizationModule = {
         return false;
       }
 
+      // Exclude users top page (/user)
+      if (isUsersTopPage(path)) return false;
+
+      // Exclude permalink (ObjectId) paths
+      if (isPermalink(path)) return false;
+
+      // Handle user pages (including homepage and sub-pages)
+      if (isUserPage(path)) return true;
+
       // Use GROWI's isCreatablePage logic to determine if this is a valid page path
       // This excludes API endpoints, system paths, etc.
       return isCreatablePage(path);