Просмотр исходного кода

feat(suggest-path): add content analysis with keyword extraction and flow/stock classification

Implement analyzeContent function (1st AI call) that performs both keyword
extraction and flow/stock information type classification in a single call,
replacing the keyword-only extraction for the Phase 2 pipeline.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
VANELLOPE\tomoyuki-t 1 месяц назад
Родитель
Сommit
ea92e6d878

+ 1 - 1
.kiro/specs/suggest-path/tasks.md

@@ -37,7 +37,7 @@
   - Include unit tests for: direct parent found, ancestor found at various depths, no ancestor found (safe default), root-level paths, paths with trailing slashes
   - Include unit tests for: direct parent found, ancestor found at various depths, no ancestor found (safe default), root-level paths, paths with trailing slashes
   - _Requirements: 7.1, 7.2_
   - _Requirements: 7.1, 7.2_
 
 
-- [ ] 3. (P) Content analysis via GROWI AI (1st AI call)
+- [x] 3. (P) Content analysis via GROWI AI (1st AI call)
   - Implement content analysis that delegates to GROWI AI for a single AI call performing both keyword extraction and flow/stock information type classification
   - Implement content analysis that delegates to GROWI AI for a single AI call performing both keyword extraction and flow/stock information type classification
   - Extract 1-5 keywords from the content, prioritizing proper nouns and technical terms over generic words
   - Extract 1-5 keywords from the content, prioritizing proper nouns and technical terms over generic words
   - Classify the content as either flow information (time-bound: meeting notes, diaries, reports) or stock information (reference: documentation, knowledge base articles)
   - Classify the content as either flow information (time-bound: meeting notes, diaries, reports) or stock information (reference: documentation, knowledge base articles)

+ 377 - 0
apps/app/src/server/routes/apiv3/ai-tools/analyze-content.spec.ts

@@ -0,0 +1,377 @@
+import { analyzeContent } from './analyze-content';
+import type { ContentAnalysis } from './suggest-path-types';
+
+const mocks = vi.hoisted(() => {
+  return {
+    chatCompletionMock: vi.fn(),
+    getClientMock: vi.fn(),
+    configManagerMock: {
+      getConfig: vi.fn(),
+    },
+  };
+});
+
+vi.mock('~/features/openai/server/services/client-delegator', () => ({
+  getClient: mocks.getClientMock,
+  isStreamResponse: (result: unknown) => {
+    return (
+      result != null &&
+      typeof result === 'object' &&
+      Symbol.asyncIterator in (result as Record<symbol, unknown>)
+    );
+  },
+}));
+
+vi.mock('~/server/service/config-manager', () => ({
+  configManager: mocks.configManagerMock,
+}));
+
+describe('analyzeContent', () => {
+  beforeEach(() => {
+    vi.resetAllMocks();
+    mocks.configManagerMock.getConfig.mockImplementation((key: string) => {
+      if (key === 'openai:serviceType') return 'openai';
+      return undefined;
+    });
+    mocks.getClientMock.mockReturnValue({
+      chatCompletion: mocks.chatCompletionMock,
+    });
+  });
+
+  describe('successful keyword extraction with quality verification', () => {
+    it('should return keywords and informationType from AI response', async () => {
+      mocks.chatCompletionMock.mockResolvedValue({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({
+                keywords: ['React', 'hooks', 'useState'],
+                informationType: 'stock',
+              }),
+            },
+          },
+        ],
+      });
+
+      const result = await analyzeContent(
+        'A guide to React hooks and useState',
+      );
+
+      expect(result).toEqual({
+        keywords: ['React', 'hooks', 'useState'],
+        informationType: 'stock',
+      } satisfies ContentAnalysis);
+    });
+
+    it('should extract 1-5 keywords prioritizing proper nouns and technical terms', async () => {
+      mocks.chatCompletionMock.mockResolvedValue({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({
+                keywords: [
+                  'TypeScript',
+                  'generics',
+                  'mapped types',
+                  'conditional types',
+                ],
+                informationType: 'stock',
+              }),
+            },
+          },
+        ],
+      });
+
+      const result = await analyzeContent(
+        'TypeScript generics and advanced type system features',
+      );
+
+      expect(result.keywords.length).toBeGreaterThanOrEqual(1);
+      expect(result.keywords.length).toBeLessThanOrEqual(5);
+    });
+
+    it('should pass content body to chatCompletion as user message', async () => {
+      mocks.chatCompletionMock.mockResolvedValue({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({
+                keywords: ['MongoDB'],
+                informationType: 'stock',
+              }),
+            },
+          },
+        ],
+      });
+
+      await analyzeContent('MongoDB aggregation pipeline');
+
+      expect(mocks.chatCompletionMock).toHaveBeenCalledWith(
+        expect.objectContaining({
+          messages: expect.arrayContaining([
+            expect.objectContaining({
+              role: 'user',
+              content: 'MongoDB aggregation pipeline',
+            }),
+          ]),
+        }),
+      );
+    });
+
+    it('should use a system prompt instructing both keyword extraction and flow/stock classification', async () => {
+      mocks.chatCompletionMock.mockResolvedValue({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({
+                keywords: ['Next.js'],
+                informationType: 'stock',
+              }),
+            },
+          },
+        ],
+      });
+
+      await analyzeContent('Next.js routing');
+
+      expect(mocks.chatCompletionMock).toHaveBeenCalledWith(
+        expect.objectContaining({
+          messages: expect.arrayContaining([
+            expect.objectContaining({
+              role: 'system',
+            }),
+          ]),
+        }),
+      );
+    });
+
+    it('should not use streaming mode', async () => {
+      mocks.chatCompletionMock.mockResolvedValue({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({
+                keywords: ['keyword'],
+                informationType: 'stock',
+              }),
+            },
+          },
+        ],
+      });
+
+      await analyzeContent('test content');
+
+      expect(mocks.chatCompletionMock).toHaveBeenCalledWith(
+        expect.not.objectContaining({
+          stream: true,
+        }),
+      );
+    });
+  });
+
+  describe('correct flow/stock classification for representative content samples', () => {
+    it('should classify meeting notes as flow', async () => {
+      mocks.chatCompletionMock.mockResolvedValue({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({
+                keywords: ['sprint', 'retrospective', 'action items'],
+                informationType: 'flow',
+              }),
+            },
+          },
+        ],
+      });
+
+      const result = await analyzeContent(
+        '2025/05/01 Sprint retrospective meeting notes. Action items discussed.',
+      );
+
+      expect(result.informationType).toBe('flow');
+    });
+
+    it('should classify documentation as stock', async () => {
+      mocks.chatCompletionMock.mockResolvedValue({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({
+                keywords: ['API', 'authentication', 'JWT'],
+                informationType: 'stock',
+              }),
+            },
+          },
+        ],
+      });
+
+      const result = await analyzeContent(
+        'API Authentication Guide: How to use JWT tokens for secure access.',
+      );
+
+      expect(result.informationType).toBe('stock');
+    });
+  });
+
+  describe('edge cases', () => {
+    it('should handle very short content', async () => {
+      mocks.chatCompletionMock.mockResolvedValue({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({
+                keywords: ['hello'],
+                informationType: 'stock',
+              }),
+            },
+          },
+        ],
+      });
+
+      const result = await analyzeContent('hello');
+
+      expect(result.keywords).toEqual(['hello']);
+      expect(result.informationType).toBe('stock');
+    });
+
+    it('should handle content with ambiguous information type', async () => {
+      mocks.chatCompletionMock.mockResolvedValue({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({
+                keywords: ['Docker', 'deployment'],
+                informationType: 'stock',
+              }),
+            },
+          },
+        ],
+      });
+
+      const result = await analyzeContent('Docker deployment notes');
+
+      expect(result.keywords.length).toBeGreaterThanOrEqual(1);
+      expect(['flow', 'stock']).toContain(result.informationType);
+    });
+  });
+
+  describe('failure propagation', () => {
+    it('should throw when chatCompletion rejects', async () => {
+      mocks.chatCompletionMock.mockRejectedValue(new Error('API error'));
+
+      await expect(analyzeContent('test')).rejects.toThrow('API error');
+    });
+
+    it('should throw when AI returns invalid JSON', async () => {
+      mocks.chatCompletionMock.mockResolvedValue({
+        choices: [{ message: { content: 'not valid json' } }],
+      });
+
+      await expect(analyzeContent('test')).rejects.toThrow();
+    });
+
+    it('should throw when AI returns JSON without keywords field', async () => {
+      mocks.chatCompletionMock.mockResolvedValue({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({ informationType: 'stock' }),
+            },
+          },
+        ],
+      });
+
+      await expect(analyzeContent('test')).rejects.toThrow();
+    });
+
+    it('should throw when AI returns JSON without informationType field', async () => {
+      mocks.chatCompletionMock.mockResolvedValue({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({ keywords: ['test'] }),
+            },
+          },
+        ],
+      });
+
+      await expect(analyzeContent('test')).rejects.toThrow();
+    });
+
+    it('should throw when AI returns invalid informationType value', async () => {
+      mocks.chatCompletionMock.mockResolvedValue({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({
+                keywords: ['test'],
+                informationType: 'invalid',
+              }),
+            },
+          },
+        ],
+      });
+
+      await expect(analyzeContent('test')).rejects.toThrow();
+    });
+
+    it('should throw when keywords is not an array', async () => {
+      mocks.chatCompletionMock.mockResolvedValue({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({
+                keywords: 'not-an-array',
+                informationType: 'stock',
+              }),
+            },
+          },
+        ],
+      });
+
+      await expect(analyzeContent('test')).rejects.toThrow();
+    });
+
+    it('should throw when keywords array is empty', async () => {
+      mocks.chatCompletionMock.mockResolvedValue({
+        choices: [
+          {
+            message: {
+              content: JSON.stringify({
+                keywords: [],
+                informationType: 'stock',
+              }),
+            },
+          },
+        ],
+      });
+
+      await expect(analyzeContent('test')).rejects.toThrow();
+    });
+
+    it('should throw when choices array is empty', async () => {
+      mocks.chatCompletionMock.mockResolvedValue({
+        choices: [],
+      });
+
+      await expect(analyzeContent('test')).rejects.toThrow();
+    });
+
+    it('should throw when message content is null', async () => {
+      mocks.chatCompletionMock.mockResolvedValue({
+        choices: [{ message: { content: null } }],
+      });
+
+      await expect(analyzeContent('test')).rejects.toThrow();
+    });
+
+    it('should throw on streaming response', async () => {
+      const streamMock = {
+        [Symbol.asyncIterator]: () => ({}),
+      };
+      mocks.chatCompletionMock.mockResolvedValue(streamMock);
+
+      await expect(analyzeContent('test')).rejects.toThrow();
+    });
+  });
+});

+ 85 - 0
apps/app/src/server/routes/apiv3/ai-tools/analyze-content.ts

@@ -0,0 +1,85 @@
+import type { OpenaiServiceType } from '~/features/openai/interfaces/ai';
+import { instructionsForInformationTypes } from '~/features/openai/server/services/assistant/instructions/commons';
+import {
+  getClient,
+  isStreamResponse,
+} from '~/features/openai/server/services/client-delegator';
+import { configManager } from '~/server/service/config-manager';
+
+import type { ContentAnalysis, InformationType } from './suggest-path-types';
+
+const VALID_INFORMATION_TYPES: readonly InformationType[] = ['flow', 'stock'];
+
+const SYSTEM_PROMPT = [
+  'You are a content analysis assistant. Analyze the following content and return a JSON object with two fields:\n',
+  '1. "keywords": An array of 1 to 5 search keywords extracted from the content. ',
+  'Prioritize proper nouns and technical terms over generic or common words.\n',
+  '2. "informationType": Classify the content as either "flow" or "stock".\n\n',
+  '## Classification Reference\n',
+  instructionsForInformationTypes,
+  '\n\n',
+  'Return only the JSON object, no other text.\n',
+  'Example: {"keywords": ["React", "useState", "hooks"], "informationType": "stock"}',
+].join('');
+
+const isValidContentAnalysis = (parsed: unknown): parsed is ContentAnalysis => {
+  if (parsed == null || typeof parsed !== 'object') {
+    return false;
+  }
+
+  const obj = parsed as Record<string, unknown>;
+
+  if (!Array.isArray(obj.keywords) || obj.keywords.length === 0) {
+    return false;
+  }
+
+  if (
+    typeof obj.informationType !== 'string' ||
+    !VALID_INFORMATION_TYPES.includes(obj.informationType as InformationType)
+  ) {
+    return false;
+  }
+
+  return true;
+};
+
+export const analyzeContent = async (
+  body: string,
+): Promise<ContentAnalysis> => {
+  const openaiServiceType = configManager.getConfig(
+    'openai:serviceType',
+  ) as OpenaiServiceType;
+  const client = getClient({ openaiServiceType });
+
+  const completion = await client.chatCompletion({
+    model: 'gpt-4.1-nano',
+    messages: [
+      { role: 'system', content: SYSTEM_PROMPT },
+      { role: 'user', content: body },
+    ],
+  });
+
+  if (isStreamResponse(completion)) {
+    throw new Error('Unexpected streaming response from chatCompletion');
+  }
+
+  const choice = completion.choices[0];
+  if (choice == null) {
+    throw new Error('No choices returned from chatCompletion');
+  }
+
+  const content = choice.message.content;
+  if (content == null) {
+    throw new Error('No content returned from chatCompletion');
+  }
+
+  const parsed: unknown = JSON.parse(content);
+
+  if (!isValidContentAnalysis(parsed)) {
+    throw new Error(
+      'Invalid content analysis response: expected { keywords: string[], informationType: "flow" | "stock" }',
+    );
+  }
+
+  return parsed;
+};

+ 7 - 0
apps/app/src/server/routes/apiv3/ai-tools/suggest-path-types.ts

@@ -15,6 +15,13 @@ export type PathSuggestion = {
   grant: number;
   grant: number;
 };
 };
 
 
+export type InformationType = 'flow' | 'stock';
+
+export type ContentAnalysis = {
+  keywords: string[];
+  informationType: InformationType;
+};
+
 export type SuggestPathResponse = {
 export type SuggestPathResponse = {
   suggestions: PathSuggestion[];
   suggestions: PathSuggestion[];
 };
 };