Explorar el Código

fix(core): use PCRE-safe regex escaping for MongoDB queries with non-ASCII whitespace

Node.js 24's RegExp.escape() escapes non-ASCII whitespace (code points
>= U+0100, e.g. U+3000 IDEOGRAPHIC SPACE) into \uXXXX form. MongoDB's
PCRE2 engine does not support \u, so such patterns fail with error 51091,
breaking page creation and v5 page migration for paths containing those
characters.

Add escapeStringForMongoRegex() to @growi/core (escapes only regex
metacharacters and passes other characters through literally — equivalent
to escape-string-regexp v5, what GROWI used before the v7.5.0 refactor),
and use it at every call site where the escaped pattern is sent to MongoDB
(20 sites across page.ts, obsolete-page.js, page/index.ts, page-grant.ts,
users.js, openai.ts, generate-children-regexp.ts, remark-lsx list-pages).
In-process .test()/.replace() sites keep RegExp.escape().

Fixes #11235

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Yuki Takei hace 1 semana
padre
commit
bd28252c1a

+ 7 - 0
.changeset/fix-mongo-regex-fullwidth-space.md

@@ -0,0 +1,7 @@
+---
+"@growi/core": patch
+---
+
+Fix page operations and v5 page migration failing for page paths that contain non-ASCII whitespace (e.g. U+3000 IDEOGRAPHIC SPACE)
+
+Node.js 24's `RegExp.escape()` escapes non-ASCII whitespace (code points >= U+0100, such as U+3000) into `\uXXXX` form, which MongoDB's PCRE2 engine does not support (error 51091). Added `escapeStringForMongoRegex()`, which escapes only regex metacharacters and passes other characters through literally, and used it wherever the resulting pattern is sent to MongoDB.

+ 4 - 2
apps/app/src/features/openai/server/services/openai.ts

@@ -11,7 +11,7 @@ import {
   isPopulated,
   PageGrant,
 } from '@growi/core';
-import { deepEquals } from '@growi/core/dist/utils';
+import { deepEquals, escapeStringForMongoRegex } from '@growi/core/dist/utils';
 import { isGlobPatternPath } from '@growi/core/dist/utils/page-path-utils';
 import createError from 'http-errors';
 import mongoose, { type HydratedDocument, type Types } from 'mongoose';
@@ -77,7 +77,9 @@ const convertPathPatternsToRegExp = (
   return pagePathPatterns.map((pagePathPattern) => {
     if (isGlobPatternPath(pagePathPattern)) {
       const trimedPagePathPattern = pagePathPattern.replace('/*', '');
-      const escapedPagePathPattern = RegExp.escape(trimedPagePathPattern);
+      const escapedPagePathPattern = escapeStringForMongoRegex(
+        trimedPagePathPattern,
+      );
       // https://regex101.com/r/x5KIZL/1
       return new RegExp(`^${escapedPagePathPattern}($|/)`);
     }

+ 2 - 1
apps/app/src/server/models/obsolete-page.js

@@ -1,5 +1,6 @@
 import { GroupType, Origin } from '@growi/core';
 import {
+  escapeStringForMongoRegex,
   pagePathUtils,
   pathUtils,
   templateChecker,
@@ -687,7 +688,7 @@ export const getPageSchema = (crowi) => {
     const regexpList = pathList.map((path) => {
       const pathWithTrailingSlash = pathUtils.addTrailingSlash(path);
       return new RegExp(
-        `^${RegExp.escape(pathWithTrailingSlash)}_{1,2}template$`,
+        `^${escapeStringForMongoRegex(pathWithTrailingSlash)}_{1,2}template$`,
       );
     });
 

+ 8 - 7
apps/app/src/server/models/page.ts

@@ -4,6 +4,7 @@ import type {
   IUserHasId,
 } from '@growi/core/dist/interfaces';
 import { getIdForRef, isPopulated } from '@growi/core/dist/interfaces';
+import { escapeStringForMongoRegex } from '@growi/core/dist/utils';
 import { hasSlash, isTopPage } from '@growi/core/dist/utils/page-path-utils';
 import {
   addTrailingSlash,
@@ -347,7 +348,7 @@ export class PageQueryBuilder {
     const pathNormalized = normalizePath(path);
     const pathWithTrailingSlash = addTrailingSlash(path);
 
-    const startsPattern = RegExp.escape(pathWithTrailingSlash);
+    const startsPattern = escapeStringForMongoRegex(pathWithTrailingSlash);
 
     this.query = this.query.and({
       $or: [
@@ -372,7 +373,7 @@ export class PageQueryBuilder {
 
     const pathWithTrailingSlash = addTrailingSlash(path);
 
-    const startsPattern = RegExp.escape(pathWithTrailingSlash);
+    const startsPattern = escapeStringForMongoRegex(pathWithTrailingSlash);
 
     this.query = this.query.and({ path: new RegExp(`^${startsPattern}`) });
 
@@ -408,7 +409,7 @@ export class PageQueryBuilder {
       return this;
     }
 
-    const startsPattern = RegExp.escape(path);
+    const startsPattern = escapeStringForMongoRegex(path);
 
     this.query = this.query.and({ path: new RegExp(`^${startsPattern}`) });
 
@@ -423,7 +424,7 @@ export class PageQueryBuilder {
       return this;
     }
 
-    const startsPattern = RegExp.escape(str);
+    const startsPattern = escapeStringForMongoRegex(str);
 
     this.query = this.query.and({
       path: new RegExp(`^(?!${startsPattern}).*$`),
@@ -439,7 +440,7 @@ export class PageQueryBuilder {
       return this;
     }
 
-    const startsPattern = RegExp.escape(path);
+    const startsPattern = escapeStringForMongoRegex(path);
 
     this.query = this.query.and({
       path: { $not: new RegExp(`^${startsPattern}(/|$)`) },
@@ -454,7 +455,7 @@ export class PageQueryBuilder {
       return this;
     }
 
-    const match = RegExp.escape(str);
+    const match = escapeStringForMongoRegex(str);
 
     this.query = this.query.and({ path: new RegExp(`^(?=.*${match}).*$`) });
 
@@ -467,7 +468,7 @@ export class PageQueryBuilder {
       return this;
     }
 
-    const match = RegExp.escape(str);
+    const match = escapeStringForMongoRegex(str);
 
     this.query = this.query.and({ path: new RegExp(`^(?!.*${match}).*$`) });
 

+ 2 - 1
apps/app/src/server/routes/apiv3/users.js

@@ -1,6 +1,7 @@
 import { SCOPE } from '@growi/core/dist/interfaces';
 import { ErrorV3 } from '@growi/core/dist/models';
 import { serializeUserSecurely } from '@growi/core/dist/models/serializers';
+import { escapeStringForMongoRegex } from '@growi/core/dist/utils';
 import { userHomepagePath } from '@growi/core/dist/utils/page-path-utils';
 import express from 'express';
 import { body, query } from 'express-validator';
@@ -340,7 +341,7 @@ module.exports = (crowi) => {
 
       // Search from input
       const searchText = req.query.searchText || '';
-      const searchWord = new RegExp(RegExp.escape(searchText));
+      const searchWord = new RegExp(escapeStringForMongoRegex(searchText));
       // Sort
       const { sort, sortOrder } = req.query;
       const sortOutput = {

+ 10 - 2
apps/app/src/server/service/page-grant.ts

@@ -5,7 +5,12 @@ import {
   type IGrantedGroup,
   PageGrant,
 } from '@growi/core';
-import { pagePathUtils, pageUtils, pathUtils } from '@growi/core/dist/utils';
+import {
+  escapeStringForMongoRegex,
+  pagePathUtils,
+  pageUtils,
+  pathUtils,
+} from '@growi/core/dist/utils';
 import mongoose, { type HydratedDocument } from 'mongoose';
 
 import type { ExternalGroupProviderType } from '~/features/external-user-group/interfaces/external-user-group';
@@ -595,7 +600,10 @@ class PageGrantService implements IPageGrantService {
     };
 
     const commonCondition = {
-      path: new RegExp(`^${RegExp.escape(addTrailingSlash(targetPath))}`, 'i'),
+      path: new RegExp(
+        `^${escapeStringForMongoRegex(addTrailingSlash(targetPath))}`,
+        'i',
+      ),
       isEmpty: false,
     };
 

+ 12 - 5
apps/app/src/server/service/page/index.ts

@@ -17,7 +17,11 @@ import type {
   Ref,
 } from '@growi/core/dist/interfaces';
 import { PageGrant } from '@growi/core/dist/interfaces';
-import { pagePathUtils, pathUtils } from '@growi/core/dist/utils';
+import {
+  escapeStringForMongoRegex,
+  pagePathUtils,
+  pathUtils,
+} from '@growi/core/dist/utils';
 import type EventEmitter from 'events';
 import type { Cursor, HydratedDocument } from 'mongoose';
 import mongoose from 'mongoose';
@@ -3961,7 +3965,8 @@ class PageService implements IPageService {
     const ancestorPaths = paths.flatMap((p) => collectAncestorPaths(p, []));
     // targets' descendants
     const pathAndRegExpsToNormalize: (RegExp | string)[] = paths.map(
-      (p) => new RegExp(`^${RegExp.escape(addTrailingSlash(p))}`, 'i'),
+      (p) =>
+        new RegExp(`^${escapeStringForMongoRegex(addTrailingSlash(p))}`, 'i'),
     );
     // include targets' path
     pathAndRegExpsToNormalize.push(...paths);
@@ -4172,7 +4177,7 @@ class PageService implements IPageService {
           const parentId = parent._id;
 
           // Build filter
-          const parentPathEscaped = RegExp.escape(
+          const parentPathEscaped = escapeStringForMongoRegex(
             parent.path === '/' ? '' : parent.path,
           ); // adjust the path for RegExp
           const filter: any = {
@@ -5138,7 +5143,9 @@ class PageService implements IPageService {
     const wasOnTree = exPage.parent != null || isTopPage(exPage.path);
     const shouldBeOnTree = currentPage.grant !== PageGrant.GRANT_RESTRICTED;
     const isChildrenExist = await Page.count({
-      path: new RegExp(`^${RegExp.escape(addTrailingSlash(currentPage.path))}`),
+      path: new RegExp(
+        `^${escapeStringForMongoRegex(addTrailingSlash(currentPage.path))}`,
+      ),
       parent: { $ne: null },
     });
 
@@ -5270,7 +5277,7 @@ class PageService implements IPageService {
     const shouldBeOnTree = grant !== PageGrant.GRANT_RESTRICTED;
     const isChildrenExist = await Page.count({
       path: new RegExp(
-        `^${RegExp.escape(addTrailingSlash(clonedPageData.path))}`,
+        `^${escapeStringForMongoRegex(addTrailingSlash(clonedPageData.path))}`,
       ),
       parent: { $ne: null },
     });

+ 61 - 0
packages/core/src/utils/escape-string-for-regex.spec.ts

@@ -0,0 +1,61 @@
+import { describe, expect, test } from 'vitest';
+
+import { escapeStringForMongoRegex } from './escape-string-for-regex';
+
+describe('escapeStringForMongoRegex', () => {
+  test('escapes regex metacharacters', () => {
+    expect(escapeStringForMongoRegex('a.b*c+d?e')).toBe('a\\.b\\*c\\+d\\?e');
+    expect(escapeStringForMongoRegex('(group)[set]{n}')).toBe(
+      '\\(group\\)\\[set\\]\\{n\\}',
+    );
+    expect(escapeStringForMongoRegex('^start$ | end\\')).toBe(
+      '\\^start\\$ \\| end\\\\',
+    );
+  });
+
+  test('escapes hyphen as \\x2d (escape-string-regexp v5 behavior)', () => {
+    expect(escapeStringForMongoRegex('a-b')).toBe('a\\x2db');
+  });
+
+  test('does NOT escape forward slash or ASCII space', () => {
+    // The .source getter still renders "/" as "\/", but the escaped string itself keeps "/" literal.
+    expect(escapeStringForMongoRegex('/parent/child')).toBe('/parent/child');
+    expect(escapeStringForMongoRegex('a b')).toBe('a b');
+  });
+
+  // Core property of the fix: unlike RegExp.escape(), this must NOT emit \uXXXX,
+  // because MongoDB's PCRE2 engine rejects \u (error 51091).
+  test('passes non-ASCII whitespace through literally (no \\u escape)', () => {
+    const ideographicSpace = ' '; // full-width space
+    const escaped = escapeStringForMongoRegex(`/page${ideographicSpace}title`);
+    expect(escaped).toContain(ideographicSpace);
+    expect(escaped).not.toContain('\\u');
+  });
+
+  test.each([
+    ' ',
+    ' ',
+    ' ',
+    ' ',
+    ' ',
+    '
',
+    '
',
+    ' ',
+    ' ',
+    ' ',
+  ])('does not emit \\u for whitespace char %j', (ws) => {
+    expect(escapeStringForMongoRegex(`x${ws}y`)).not.toContain('\\u');
+  });
+
+  test('produces a pattern that literally matches the original string', () => {
+    for (const s of [
+      '/parent/全角 space', // U+3000
+      '/a.b+c?(d)[e]',
+      '/path-with-hyphen',
+      '/nbsp here',
+    ]) {
+      const re = new RegExp(`^${escapeStringForMongoRegex(s)}$`);
+      expect(re.test(s)).toBe(true);
+    }
+  });
+});

+ 22 - 0
packages/core/src/utils/escape-string-for-regex.ts

@@ -0,0 +1,22 @@
+/**
+ * Escape a string for safe use inside a regular expression that is sent to MongoDB
+ * (`$regex` / `new RegExp(...)` used in a query). MongoDB evaluates regular expressions
+ * with the PCRE2 engine.
+ *
+ * Why not `RegExp.escape()`:
+ *   Node.js 24's built-in `RegExp.escape()` escapes non-ASCII whitespace
+ *   (code points >= U+0100, e.g. U+3000 IDEOGRAPHIC SPACE) into `\uXXXX` form.
+ *   PCRE2 does not support `\u`, so such a pattern makes MongoDB throw
+ *   "Regular expression is invalid: PCRE2 does not support ... \u" (error 51091).
+ *
+ * This helper instead escapes only regex metacharacters and passes every other
+ * character through literally — behaviourally identical to `escape-string-regexp` v5,
+ * which is what GROWI used before the v7.5.0 refactor. The output never contains `\u`,
+ * so it is safe to hand to MongoDB.
+ *
+ * Use this (not `RegExp.escape`) whenever the resulting pattern is sent to MongoDB.
+ * For in-process JS regex (`.test()` / `.replace()`), `RegExp.escape` is fine.
+ */
+export const escapeStringForMongoRegex = (str: string): string => {
+  return str.replace(/[|\\{}()[\]^$+*?.]/g, '\\$&').replace(/-/g, '\\x2d');
+};

+ 1 - 0
packages/core/src/utils/index.ts

@@ -4,6 +4,7 @@ import * as _envUtils from './env-utils';
 export const envUtils = _envUtils;
 
 export * from './browser-utils';
+export * from './escape-string-for-regex';
 export * from './global-event-target';
 export * from './growi-theme-metadata';
 export * as deepEquals from './is-deep-equals';

+ 12 - 2
packages/core/src/utils/page-path-utils/generate-children-regexp.spec.ts

@@ -17,8 +17,10 @@ describe('generateChildrenRegExp', () => {
       invalidPaths: ['/parent', '/parent/child/grandchild', '/other/path'],
     },
     {
+      // escapeStringForMongoRegex does not escape ASCII space (it is PCRE-safe as-is),
+      // unlike RegExp.escape which would emit \x20.
       path: '/parent (with brackets)',
-      expected: '^\\/parent\\x20\\(with\\x20brackets\\)(\\/[^/]+)\\/?$',
+      expected: '^\\/parent \\(with brackets\\)(\\/[^/]+)\\/?$',
       validPaths: [
         '/parent (with brackets)/child',
         '/parent (with brackets)/test',
@@ -30,13 +32,21 @@ describe('generateChildrenRegExp', () => {
     },
     {
       path: '/parent[with square]',
-      expected: '^\\/parent\\[with\\x20square\\](\\/[^/]+)\\/?$',
+      expected: '^\\/parent\\[with square\\](\\/[^/]+)\\/?$',
       validPaths: ['/parent[with square]/child', '/parent[with square]/test'],
       invalidPaths: [
         '/parent[with square]',
         '/parent[with square]/child/grandchild',
       ],
     },
+    {
+      // Regression for #11235: a path containing U+3000 (full-width space) must NOT be
+      // escaped to   — MongoDB's PCRE2 rejects \u (error 51091). The char passes through literally.
+      path: '/親 ページ',
+      expected: '^\\/親 ページ(\\/[^/]+)\\/?$',
+      validPaths: ['/親 ページ/child', '/親 ページ/テスト'],
+      invalidPaths: ['/親 ページ', '/親 ページ/child/grandchild'],
+    },
     {
       path: '/parent*with+special?chars',
       expected: '^\\/parent\\*with\\+special\\?chars(\\/[^/]+)\\/?$',

+ 5 - 1
packages/core/src/utils/page-path-utils/generate-children-regexp.ts

@@ -1,3 +1,4 @@
+import { escapeStringForMongoRegex } from '../escape-string-for-regex';
 import { isTopPage } from './is-top-page';
 
 /**
@@ -10,5 +11,8 @@ export const generateChildrenRegExp = (path: string): RegExp => {
 
   // https://regex101.com/r/mrDJrx/1
   // ex. /parent/any_child OR /any_level1
-  return new RegExp(`^${RegExp.escape(path)}(\\/[^/]+)\\/?$`);
+  // NOTE: use escapeStringForMongoRegex (not RegExp.escape) because this pattern is sent to
+  // MongoDB ($regex). RegExp.escape would emit \uXXXX for non-ASCII whitespace (e.g. U+3000),
+  // which PCRE2 rejects (error 51091).
+  return new RegExp(`^${escapeStringForMongoRegex(path)}(\\/[^/]+)\\/?$`);
 };

+ 5 - 4
packages/remark-lsx/src/server/routes/list-pages/index.spec.ts

@@ -1,4 +1,5 @@
 import type { IPageHasId, IUser } from '@growi/core';
+import { escapeStringForMongoRegex } from '@growi/core/dist/utils';
 import type { Request, Response } from 'express';
 import createError from 'http-errors';
 import { mock } from 'vitest-mock-extended';
@@ -187,7 +188,7 @@ describe('listPages', () => {
       const pagePath = '/parent';
       const optionsFilter = '^child';
       const expectedRegex = new RegExp(
-        `^${RegExp.escape('/parent/')}${RegExp.escape('child')}`,
+        `^${escapeStringForMongoRegex('/parent/')}${escapeStringForMongoRegex('child')}`,
       );
 
       // when
@@ -202,7 +203,7 @@ describe('listPages', () => {
       const pagePath = '/parent';
       const optionsFilter = 'child';
       const expectedRegex = new RegExp(
-        `^${RegExp.escape('/parent/')}.*${RegExp.escape('child')}`,
+        `^${escapeStringForMongoRegex('/parent/')}.*${escapeStringForMongoRegex('child')}`,
       );
 
       // when
@@ -230,7 +231,7 @@ describe('listPages', () => {
       const pagePath = '/parent';
       const optionsFilter = 'child';
       const expectedRegex = new RegExp(
-        `^${RegExp.escape('/parent/')}.*${RegExp.escape('child')}`,
+        `^${escapeStringForMongoRegex('/parent/')}.*${escapeStringForMongoRegex('child')}`,
       );
 
       // when
@@ -320,7 +321,7 @@ describe('when excludedPaths is handled', () => {
 
     // check if the logic generates the correct regex: ^\/(user|tmp)(\/|$)
     const expectedRegex = new RegExp(
-      `^\\/(${RegExp.escape('user')}|${RegExp.escape('tmp')})(\\/|$)`,
+      `^\\/(${escapeStringForMongoRegex('user')}|${escapeStringForMongoRegex('tmp')})(\\/|$)`,
     );
     expect(queryMock.and).toHaveBeenCalledWith([
       {

+ 7 - 5
packages/remark-lsx/src/server/routes/list-pages/index.ts

@@ -1,6 +1,6 @@
 import type { IUser } from '@growi/core';
 import { OptionParser } from '@growi/core/dist/remark-plugins';
-import { pathUtils } from '@growi/core/dist/utils';
+import { escapeStringForMongoRegex, pathUtils } from '@growi/core/dist/utils';
 import { loggerFactory } from '@growi/logger';
 import type { Request, Response } from 'express';
 import createError, { isHttpError } from 'http-errors';
@@ -33,16 +33,18 @@ export function addFilterCondition(
     );
   }
 
-  const pagePathForRegexp = RegExp.escape(addTrailingSlash(pagePath));
+  const pagePathForRegexp = escapeStringForMongoRegex(
+    addTrailingSlash(pagePath),
+  );
 
   let filterPath: RegExp;
   try {
     if (optionsFilter.charAt(0) === '^') {
       // move '^' to the first of path
-      const escapedFilter = RegExp.escape(optionsFilter.slice(1));
+      const escapedFilter = escapeStringForMongoRegex(optionsFilter.slice(1));
       filterPath = new RegExp(`^${pagePathForRegexp}${escapedFilter}`);
     } else {
-      const escapedFilter = RegExp.escape(optionsFilter);
+      const escapedFilter = escapeStringForMongoRegex(optionsFilter);
       filterPath = new RegExp(`^${pagePathForRegexp}.*${escapedFilter}`);
     }
   } catch (err) {
@@ -101,7 +103,7 @@ export const listPages = ({
       if (excludedPaths.length > 0) {
         const escapedPaths = excludedPaths.map((p) => {
           const cleanPath = p.startsWith('/') ? p.substring(1) : p;
-          return RegExp.escape(cleanPath);
+          return escapeStringForMongoRegex(cleanPath);
         });
 
         const regex = new RegExp(`^\\/(${escapedPaths.join('|')})(\\/|$)`);