Просмотр исходного кода

add test for ClientFuzzyMatcher

Yuki Takei 9 месяцев назад
Родитель
Сommit
5420e81573

+ 532 - 0
apps/app/src/features/openai/client/services/editor-assistant/fuzzy-matching.spec.ts

@@ -0,0 +1,532 @@
+import {
+  describe, test, expect, beforeEach,
+} from 'vitest';
+
+import type { SearchContext } from '../../../interfaces/editor-assistant/types';
+
+import {
+  ClientFuzzyMatcher,
+  calculateSimilarity,
+  splitLines,
+  joinLines,
+  measurePerformance,
+} from './fuzzy-matching';
+
+// Test utilities
+function createTestContent(): string {
+  return `function example() {
+  console.log("hello world");
+  const value = 42;
+  return value;
+}
+
+class TestClass {
+  constructor() {
+    this.name = "test";
+  }
+
+  method() {
+    return this.name;
+  }
+}`;
+}
+
+function createLargeContent(lines = 1000): string {
+  const content: string[] = [];
+  for (let i = 1; i <= lines; i++) {
+    content.push(`Line ${i}: This is line number ${i} with some content`);
+  }
+  return content.join('\n');
+}
+
+function createMultiLineSearchContent(): string {
+  return `const data = {
+  key: "value",
+  number: 123
+};`;
+}
+
+describe('fuzzy-matching', () => {
+  describe('calculateSimilarity', () => {
+    test('should return 1.0 for exact matches', () => {
+      const similarity = calculateSimilarity('hello world', 'hello world');
+      expect(similarity).toBe(1.0);
+    });
+
+    test('should return 0 for empty search text', () => {
+      const similarity = calculateSimilarity('hello world', '');
+      expect(similarity).toBe(0);
+    });
+
+    test('should return 1.0 for exact match after normalization', () => {
+      const similarity = calculateSimilarity('Hello  World', 'hello world');
+      expect(similarity).toBeGreaterThan(0.9); // Should be high due to normalization
+    });
+
+    test('should return appropriate similarity for similar strings', () => {
+      const similarity = calculateSimilarity('hello world', 'helo world'); // typo
+      expect(similarity).toBeGreaterThan(0.8);
+      expect(similarity).toBeLessThan(1.0);
+    });
+
+    test('should return low similarity for very different strings', () => {
+      const similarity = calculateSimilarity('hello world', 'completely different');
+      expect(similarity).toBeLessThan(0.3);
+    });
+
+    test('should handle length-based early filtering', () => {
+      const similarity = calculateSimilarity('a', 'very long string that is much longer');
+      expect(similarity).equals(0); // fixed to zero for early filtering for performance
+    });
+
+    test('should handle unicode characters', () => {
+      const similarity = calculateSimilarity('こんにちは世界', 'こんにちは世界');
+      expect(similarity).toBe(1.0);
+    });
+  });
+
+  describe('splitLines and joinLines', () => {
+    test('should split content into lines correctly', () => {
+      const content = 'line1\nline2\nline3';
+      const lines = splitLines(content);
+      expect(lines).toEqual(['line1', 'line2', 'line3']);
+    });
+
+    test('should handle different line endings', () => {
+      const contentCRLF = 'line1\r\nline2\r\nline3';
+      const lines = splitLines(contentCRLF);
+      expect(lines).toEqual(['line1', 'line2', 'line3']);
+    });
+
+    test('should join lines correctly', () => {
+      const lines = ['line1', 'line2', 'line3'];
+      const content = joinLines(lines);
+      expect(content).toBe('line1\nline2\nline3');
+    });
+
+    test('should handle empty lines', () => {
+      const lines = ['line1', '', 'line3'];
+      const content = joinLines(lines);
+      expect(content).toBe('line1\n\nline3');
+    });
+  });
+
+  describe('measurePerformance', () => {
+    test('should measure operation duration', () => {
+      const { result, duration } = measurePerformance(() => {
+        // Simulate some work
+        return 'test result';
+      }, 'test operation');
+
+      expect(result).toBe('test result');
+      expect(duration).toBeGreaterThanOrEqual(0);
+      expect(typeof duration).toBe('number');
+    });
+
+    test('should work with throwing operations', () => {
+      expect(() => {
+        measurePerformance(() => {
+          throw new Error('test error');
+        });
+      }).toThrow('test error');
+    });
+  });
+
+  describe('ClientFuzzyMatcher', () => {
+    let matcher: ClientFuzzyMatcher;
+
+    beforeEach(() => {
+      matcher = new ClientFuzzyMatcher(0.8, 1000);
+    });
+
+    describe('constructor and configuration', () => {
+      test('should create matcher with default values', () => {
+        const defaultMatcher = new ClientFuzzyMatcher();
+        expect(defaultMatcher.getThreshold()).toBe(0.85);
+        expect(defaultMatcher.getMaxSearchTime()).toBe(1000);
+      });
+
+      test('should create matcher with custom values', () => {
+        const customMatcher = new ClientFuzzyMatcher(0.9, 2000);
+        expect(customMatcher.getThreshold()).toBe(0.9);
+        expect(customMatcher.getMaxSearchTime()).toBe(2000);
+      });
+
+      test('should allow threshold updates', () => {
+        matcher.setThreshold(0.7);
+        expect(matcher.getThreshold()).toBe(0.7);
+      });
+
+      test('should throw error for invalid threshold', () => {
+        expect(() => matcher.setThreshold(-0.1)).toThrow('Threshold must be between 0.0 and 1.0');
+        expect(() => matcher.setThreshold(1.1)).toThrow('Threshold must be between 0.0 and 1.0');
+      });
+    });
+
+    describe('tryExactLineMatch', () => {
+      test('should match exact content at specified line', () => {
+        const content = createTestContent();
+        const result = matcher.tryExactLineMatch(content, 'console.log("hello world");', 2);
+
+        expect(result.success).toBe(true);
+        expect(result.similarity).toBe(1.0);
+        expect(result.matchedRange).toBeDefined();
+        expect(result.matchedRange?.startLine).toBe(2);
+      });
+
+      test('should fail for invalid line number', () => {
+        const content = createTestContent();
+        const result = matcher.tryExactLineMatch(content, 'test', 0);
+
+        expect(result.success).toBe(false);
+        expect(result.error).toBe('Invalid line number');
+      });
+
+      test('should fail for line number beyond content', () => {
+        const content = createTestContent();
+        const lines = content.split('\n');
+        const result = matcher.tryExactLineMatch(content, 'test', lines.length + 1);
+
+        expect(result.success).toBe(false);
+        expect(result.error).toBe('Invalid line number');
+      });
+
+      test('should handle multi-line search', () => {
+        const content = createTestContent();
+        const searchText = 'class TestClass {\n  constructor() {';
+        const result = matcher.tryExactLineMatch(content, searchText, 7);
+
+        expect(result.success).toBe(true);
+        expect(result.similarity).toBeGreaterThan(0.8);
+      });
+
+      test('should fail when not enough lines for multi-line search', () => {
+        const content = 'line1\nline2';
+        const searchText = 'line1\nline2\nline3';
+        const result = matcher.tryExactLineMatch(content, searchText, 1);
+
+        expect(result.success).toBe(false);
+        expect(result.error).toBe('Not enough lines for search');
+      });
+
+      test('should handle fuzzy matching below threshold', () => {
+        const content = createTestContent();
+        const result = matcher.tryExactLineMatch(content, 'completely different text', 2);
+
+        expect(result.success).toBe(false);
+        expect(result.error).toBe('Similarity below threshold');
+      });
+    });
+
+    describe('performBufferedSearch', () => {
+      test('should find match within buffer range', () => {
+        const content = createTestContent();
+        const result = matcher.performBufferedSearch(content, 'console.log("hello world");', 2, 5);
+
+        expect(result.success).toBe(true);
+        expect(result.similarity).toBe(1.0);
+      });
+
+      test('should find best match when multiple similar matches exist', () => {
+        const content = `console.log("test1");
+console.log("test2");
+console.log("hello world");
+console.log("test3");`;
+        const result = matcher.performBufferedSearch(content, 'console.log("hello world");', 2, 2);
+
+        expect(result.success).toBe(true);
+        expect(result.similarity).toBe(1.0);
+      });
+
+      test('should return no match when nothing similar found', () => {
+        const content = createTestContent();
+        const result = matcher.performBufferedSearch(content, 'nonexistent function call', 2, 5);
+
+        expect(result.success).toBe(false);
+        expect(result.error).toBe('No match found');
+      });
+
+      test('should handle edge case with buffer exceeding content bounds', () => {
+        const content = 'line1\nline2\nline3';
+        const result = matcher.performBufferedSearch(content, 'line2', 2, 100);
+
+        expect(result.success).toBe(true);
+        expect(result.similarity).toBe(1.0);
+      });
+    });
+
+    describe('performFullSearch', () => {
+      test('should find match anywhere in content', () => {
+        const content = createTestContent();
+        const result = matcher.performFullSearch(content, 'return value;');
+
+        expect(result.success).toBe(true);
+        expect(result.similarity).toBe(1.0);
+      });
+
+      test('should find best match among multiple candidates', () => {
+        const content = `return false;
+return true;
+return value;
+return null;`;
+        const result = matcher.performFullSearch(content, 'return value;');
+
+        expect(result.success).toBe(true);
+        expect(result.similarity).toBe(1.0);
+      });
+
+      test('should return no match when threshold not met', () => {
+        const content = createTestContent();
+        const result = matcher.performFullSearch(content, 'completely unrelated content here');
+
+        expect(result.success).toBe(false);
+        expect(result.error).toBe('No match found');
+      });
+
+      test('should handle early exit for exact matches', () => {
+        const largeContent = createLargeContent(500);
+        const result = matcher.performFullSearch(largeContent, 'Line 10: This is line number 10 with some content');
+
+        expect(result.success).toBe(true);
+        expect(result.similarity).toBe(1.0);
+      });
+    });
+
+    describe('findBestMatch', () => {
+      test('should return early for empty search text', () => {
+        const content = createTestContent();
+        const result = matcher.findBestMatch(content, '');
+
+        expect(result.success).toBe(false);
+        expect(result.error).toBe('Empty search text');
+      });
+
+      test('should return early for whitespace-only search text', () => {
+        const content = createTestContent();
+        const result = matcher.findBestMatch(content, '   \n\t  ');
+
+        expect(result.success).toBe(false);
+        expect(result.error).toBe('Empty search text');
+      });
+
+      test('should use exact line match when preferredStartLine is provided', () => {
+        const content = createTestContent();
+        const context: SearchContext = { preferredStartLine: 2 };
+        const result = matcher.findBestMatch(content, 'console.log("hello world");', context);
+
+        expect(result.success).toBe(true);
+        expect(result.similarity).toBe(1.0);
+        expect(result.matchedRange?.startLine).toBe(2);
+      });
+
+      test('should fall back to buffered search when exact line match fails', () => {
+        const content = createTestContent();
+        const context: SearchContext = { preferredStartLine: 1, bufferLines: 10 };
+        const result = matcher.findBestMatch(content, 'console.log("hello world");', context);
+
+        expect(result.success).toBe(true);
+        expect(result.similarity).toBe(1.0);
+      });
+
+      test('should use full search when no preferredStartLine provided', () => {
+        const content = createTestContent();
+        const result = matcher.findBestMatch(content, 'return this.name;');
+
+        expect(result.success).toBe(true);
+        expect(result.similarity).toBe(1.0);
+      });
+
+      test('should respect search context bounds', () => {
+        const content = createTestContent();
+        const context: SearchContext = {
+          startLine: 7,
+          endLine: 15,
+          bufferLines: 2,
+        };
+        const result = matcher.findBestMatch(content, 'constructor()', context);
+
+        expect(result.success).toBe(true);
+        expect(result.similarity).toBeGreaterThan(0.8);
+      });
+
+      test('should handle timeout protection', () => {
+        const timeoutMatcher = new ClientFuzzyMatcher(0.8, 1); // Very short timeout
+        const largeContent = createLargeContent(1000);
+
+        // This might timeout, but should not crash
+        const result = timeoutMatcher.findBestMatch(largeContent, 'some search text that might not exist');
+
+        // Should either succeed or fail gracefully
+        expect(typeof result.success).toBe('boolean');
+        if (result.searchTime !== undefined) {
+          expect(result.searchTime).toBeGreaterThanOrEqual(0);
+        }
+      });
+
+      test('should provide search time information', () => {
+        const content = createTestContent();
+        const result = matcher.findBestMatch(content, 'console.log("hello world");');
+
+        expect(result.searchTime).toBeGreaterThanOrEqual(0);
+        expect(typeof result.searchTime).toBe('number');
+      });
+    });
+
+    describe('edge cases and error handling', () => {
+      test('should handle empty content', () => {
+        const result = matcher.findBestMatch('', 'search text');
+
+        expect(result.success).toBe(false);
+        // May not have error field set depending on implementation path
+        expect(result.similarity).toBe(0);
+      });
+
+      test('should handle single line content', () => {
+        const result = matcher.findBestMatch('single line here', 'single line');
+
+        // Based on actual behavior: similarity is ~0.6875, below 0.85 threshold
+        expect(result.similarity).toBeGreaterThan(0.6);
+        expect(result.similarity).toBeLessThan(0.85);
+        expect(result.success).toBe(false);
+      });
+
+      test('should handle very long lines', () => {
+        const longLine = 'a'.repeat(10000);
+        const result = matcher.findBestMatch(longLine, 'a'.repeat(100));
+
+        // Based on actual behavior: similarity is ~0.01, far below threshold
+        expect(result.similarity).equal(0);
+        expect(result.similarity).toBeLessThan(0.85);
+        expect(result.success).toBe(false);
+      });
+
+      test('should handle unicode and special characters', () => {
+        const content = `function test() {
+  const message = "こんにちは世界 🌍";
+  console.log(message);
+}`;
+        const result = matcher.findBestMatch(content, 'const message = "こんにちは世界 🌍";');
+
+        expect(result.success).toBe(true);
+        expect(result.similarity).toBe(1.0);
+      });
+
+      test('should handle content with no newlines', () => {
+        const content = 'no newlines in this content at all';
+        const result = matcher.findBestMatch(content, 'newlines');
+
+        // Single word search may not meet high threshold
+        expect(result.similarity).equal(0); // fixed to zero for early filtering for performance
+        if (result.similarity >= 0.85) {
+          expect(result.success).toBe(true);
+        }
+        else {
+          expect(result.success).toBe(false);
+        }
+      });
+
+      test('should handle multi-line search text', () => {
+        const content = createTestContent();
+        const multiLineSearch = createMultiLineSearchContent();
+
+        // Add the multi-line content to our test content
+        const extendedContent = `${content}\n\n${multiLineSearch}`;
+        const result = matcher.findBestMatch(extendedContent, multiLineSearch);
+
+        expect(result.success).toBe(true);
+        expect(result.similarity).toBe(1.0);
+      });
+    });
+
+    describe('performance characteristics', () => {
+      test('should handle large content efficiently', () => {
+        const largeContent = createLargeContent(2000);
+        const startTime = performance.now();
+
+        const result = matcher.findBestMatch(largeContent, 'Line 1500: This is line number 1500 with some content');
+
+        const duration = performance.now() - startTime;
+
+        expect(result.success).toBe(true);
+        expect(duration).toBeLessThan(5000); // Should complete within 5 seconds
+      });
+
+      test('should provide consistent results', () => {
+        const content = createTestContent();
+        const searchText = 'console.log("hello world");';
+
+        const result1 = matcher.findBestMatch(content, searchText);
+        const result2 = matcher.findBestMatch(content, searchText);
+
+        expect(result1.success).toBe(result2.success);
+        expect(result1.similarity).toBe(result2.similarity);
+      });
+
+      test('should handle multiple sequential searches', () => {
+        const content = createTestContent();
+        const searches = [
+          'function example()',
+          'console.log("hello world");',
+          'const value = 42;',
+          'return value;',
+          'class TestClass',
+        ];
+
+        for (const search of searches) {
+          const result = matcher.findBestMatch(content, search);
+          expect(result.success).toBe(true);
+          expect(result.similarity).toBeGreaterThan(0.85); // Match the updated threshold
+        }
+      });
+    });
+  });
+
+  describe('middle-out search algorithm', () => {
+    let matcher: ClientFuzzyMatcher;
+
+    beforeEach(() => {
+      matcher = new ClientFuzzyMatcher(0.8, 1000);
+    });
+
+    test('should find matches efficiently with middle-out strategy', () => {
+      const lines: string[] = [];
+      for (let i = 1; i <= 100; i++) {
+        lines.push(`Line ${i}`);
+      }
+      lines[49] = 'TARGET LINE'; // Insert target at middle
+      const content = lines.join('\n');
+
+      const context: SearchContext = {
+        preferredStartLine: 50, // Start search around middle
+        bufferLines: 10,
+      };
+
+      const result = matcher.findBestMatch(content, 'TARGET LINE', context);
+
+      expect(result.success).toBe(true);
+      expect(result.similarity).toBe(1.0);
+      expect(result.matchedRange?.startLine).toBe(50);
+    });
+
+    test('should expand search outward from preferred line', () => {
+      const lines: string[] = [];
+      for (let i = 1; i <= 50; i++) {
+        lines.push(`Line ${i}`);
+      }
+      // Place target away from preferred start line
+      lines[39] = 'TARGET LINE'; // Line 40
+      const content = lines.join('\n');
+
+      const context: SearchContext = {
+        preferredStartLine: 25,
+        bufferLines: 20,
+      };
+
+      const result = matcher.findBestMatch(content, 'TARGET LINE', context);
+
+      expect(result.success).toBe(true);
+      expect(result.similarity).toBe(1.0);
+      expect(result.matchedRange?.startLine).toBe(40);
+    });
+  });
+});

+ 7 - 16
apps/app/src/features/openai/client/services/editor-assistant/fuzzy-matching.ts

@@ -16,7 +16,7 @@ import { normalizeForBrowserFuzzyMatch } from './text-normalization';
 
 /**
  * Calculate similarity between two strings using Levenshtein distance
- * Optimized for browser performance with early exit strategies
+ * Compatible with roo-code's similarity calculation
  */
 export function calculateSimilarity(original: string, search: string): number {
   // Empty searches are not supported
@@ -44,12 +44,13 @@ export function calculateSimilarity(original: string, search: string): number {
     return 1;
   }
 
-  // Calculate Levenshtein distance
+  // Calculate Levenshtein distance using fastest-levenshtein's distance function
   const dist = distance(normalizedOriginal, normalizedSearch);
-  const maxLength = Math.max(normalizedOriginal.length, normalizedSearch.length);
 
-  // Convert distance to similarity ratio
-  return Math.max(0, 1 - (dist / maxLength));
+  // Calculate similarity ratio (0 to 1, where 1 is an exact match)
+  // This matches roo-code's calculation method
+  const maxLength = Math.max(normalizedOriginal.length, normalizedSearch.length);
+  return 1 - dist / maxLength;
 }
 
 // -----------------------------------------------------------------------------
@@ -62,7 +63,7 @@ export class ClientFuzzyMatcher {
 
   private readonly maxSearchTime: number; // Browser performance limit
 
-  constructor(threshold = 0.8, maxSearchTimeMs = 1000) {
+  constructor(threshold = 0.85, maxSearchTimeMs = 1000) {
     this.threshold = threshold;
     this.maxSearchTime = maxSearchTimeMs;
   }
@@ -458,13 +459,3 @@ export function measurePerformance<T>(
 
   return { result, duration };
 }
-
-// -----------------------------------------------------------------------------
-// Export Default Instance
-// -----------------------------------------------------------------------------
-
-/**
- * Default client fuzzy matcher instance
- * Pre-configured for typical browser usage
- */
-export const defaultClientFuzzyMatcher = new ClientFuzzyMatcher(0.8, 1000);

+ 1 - 1
apps/app/src/features/openai/client/services/editor-assistant/search-replace-engine.ts

@@ -15,7 +15,7 @@ export function performSearchReplace(
   const content = yText.toString();
 
   // 1. Start search from the specified line
-  const fuzzyMatcher = new ClientFuzzyMatcher(0.8);
+  const fuzzyMatcher = new ClientFuzzyMatcher();
   const result = fuzzyMatcher.findBestMatch(
     content,
     searchText,