index.test.js 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. import { describe, it, expect } from 'vitest';
  2. import { splitMarkdownIntoChunks } from '../src/services/markdown-splitter';
  3. describe('splitMarkdownIntoChunks', () => {
  4. it('should split markdown into sections using the specified chunk size', async() => {
  5. const markdown = `
  6. # Heading 1
  7. This is some content under heading 1.
  8. # Heading 2
  9. This is some content under heading 2.
  10. # Heading 3
  11. This is some content under heading 3.
  12. # Heading 4
  13. This is some content under heading 4.
  14. `;
  15. const chunkSize = 60;
  16. const result = await splitMarkdownIntoChunks(markdown, chunkSize);
  17. // Expect the result to have more than one section due to chunkSize limitations
  18. expect(result.length).toBeGreaterThan(1);
  19. for (const section of result) {
  20. expect(section.pageContent.length).toBeLessThanOrEqual(chunkSize);
  21. }
  22. });
  23. it('should handle markdown without headers', async() => {
  24. const markdown = `
  25. This is some content without any headers. It should not be split unless it exceeds the chunk size.
  26. `;
  27. const chunkSize = 100;
  28. const result = await splitMarkdownIntoChunks(markdown, chunkSize);
  29. // Since the content is short, expect no splits
  30. expect(result.length).toBe(1);
  31. expect(result[0].pageContent.length).toBeLessThanOrEqual(chunkSize);
  32. });
  33. it('should split large content under a single heading', async() => {
  34. const markdown = `
  35. # Large Heading
  36. ${'This is some repetitive content. '.repeat(50)}
  37. `;
  38. const chunkSize = 100;
  39. const result = await splitMarkdownIntoChunks(markdown, chunkSize);
  40. expect(result.length).toBeGreaterThan(1);
  41. for (const section of result) {
  42. expect(section.pageContent.length).toBeLessThanOrEqual(chunkSize);
  43. }
  44. });
  45. it('should handle empty markdown input', async() => {
  46. const markdown = '';
  47. const chunkSize = 10;
  48. const result = await splitMarkdownIntoChunks(markdown, chunkSize);
  49. // Expect an empty result for empty markdown input
  50. expect(result.length).toBe(0);
  51. });
  52. it('should correctly split nested headings', async() => {
  53. const markdown = `
  54. # Heading 1
  55. Content under heading 1.
  56. ## Subheading 1.1
  57. Content under subheading 1.1.
  58. # Heading 2
  59. Content under heading 2.
  60. `;
  61. const chunkSize = 50;
  62. const result = await splitMarkdownIntoChunks(markdown, chunkSize);
  63. // Expect multiple sections
  64. expect(result.length).toBeGreaterThan(1);
  65. for (const section of result) {
  66. expect(section.pageContent.length).toBeLessThanOrEqual(chunkSize);
  67. }
  68. });
  69. it('should not split if content fits within chunk size', async() => {
  70. const markdown = `
  71. # Heading
  72. Short content.
  73. `;
  74. const chunkSize = 100;
  75. const result = await splitMarkdownIntoChunks(markdown, chunkSize);
  76. // Expect the result to be a single section since the content is small
  77. expect(result.length).toBe(1);
  78. expect(result[0].pageContent.length).toBeLessThanOrEqual(chunkSize);
  79. });
  80. it('should handle multiple consecutive headers', async() => {
  81. const markdown = `
  82. # Heading 1
  83. # Heading 2
  84. # Heading 3
  85. # Heading 4
  86. `;
  87. const chunkSize = 50;
  88. const result = await splitMarkdownIntoChunks(markdown, chunkSize);
  89. // Expect each heading to be treated as a separate section
  90. expect(result.length).toBeGreaterThan(1);
  91. for (const section of result) {
  92. expect(section.pageContent.length).toBeLessThanOrEqual(chunkSize);
  93. }
  94. });
  95. });