syntax.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682
  1. /**
  2. * @typedef {import('micromark-util-types').Extension} Extension
  3. * @typedef {import('micromark-util-types').Resolver} Resolver
  4. * @typedef {import('micromark-util-types').Tokenizer} Tokenizer
  5. * @typedef {import('micromark-util-types').State} State
  6. * @typedef {import('micromark-util-types').Token} Token
  7. */
  8. /**
  9. * @typedef {'left'|'center'|'right'|'none'} Align
  10. */
  11. import { factorySpace } from 'micromark-factory-space';
  12. import {
  13. markdownLineEnding,
  14. markdownLineEndingOrSpace,
  15. markdownSpace,
  16. } from 'micromark-util-character';
  17. import { codes } from 'micromark-util-symbol/codes.js';
  18. import { constants } from 'micromark-util-symbol/constants.js';
  19. import { types } from 'micromark-util-symbol/types.js';
  20. import { ok as assert } from 'uvu/assert';
  21. /**
  22. * Syntax extension for micromark (passed in `extensions`).
  23. *
  24. * @type {Extension}
  25. */
  26. export const gfmTable = {
  27. flow: { null: { tokenize: tokenizeTable, resolve: resolveTable } },
  28. };
  29. const nextPrefixedOrBlank = {
  30. tokenize: tokenizeNextPrefixedOrBlank,
  31. partial: true,
  32. };
  33. /** @type {Resolver} */
  34. // eslint-disable-next-line complexity
  35. function resolveTable(events, context) {
  36. let index = -1;
  37. /** @type {boolean|undefined} */
  38. let inHead;
  39. /** @type {boolean|undefined} */
  40. let inDelimiterRow;
  41. /** @type {boolean|undefined} */
  42. let inRow;
  43. /** @type {number|undefined} */
  44. let contentStart;
  45. /** @type {number|undefined} */
  46. let contentEnd;
  47. /** @type {number|undefined} */
  48. let cellStart;
  49. /** @type {boolean|undefined} */
  50. let seenCellInRow;
  51. while (++index < events.length) {
  52. const token = events[index][1];
  53. if (inRow) {
  54. if (token.type === 'temporaryTableCellContent') {
  55. contentStart = contentStart || index;
  56. contentEnd = index;
  57. }
  58. if (
  59. // Combine separate content parts into one.
  60. (token.type === 'tableCellDivider' || token.type === 'tableRow')
  61. && contentEnd
  62. ) {
  63. assert(
  64. contentStart,
  65. 'expected `contentStart` to be defined if `contentEnd` is',
  66. );
  67. const content = {
  68. type: 'tableContent',
  69. start: events[contentStart][1].start,
  70. end: events[contentEnd][1].end,
  71. };
  72. /** @type {Token} */
  73. const text = {
  74. type: types.chunkText,
  75. start: content.start,
  76. end: content.end,
  77. // @ts-expect-error It’s fine.
  78. contentType: constants.contentTypeText,
  79. };
  80. assert(
  81. contentStart,
  82. 'expected `contentStart` to be defined if `contentEnd` is',
  83. );
  84. events.splice(
  85. contentStart,
  86. contentEnd - contentStart + 1,
  87. ['enter', content, context],
  88. ['enter', text, context],
  89. ['exit', text, context],
  90. ['exit', content, context],
  91. );
  92. index -= contentEnd - contentStart - 3;
  93. contentStart = undefined;
  94. contentEnd = undefined;
  95. }
  96. }
  97. if (
  98. events[index][0] === 'exit'
  99. && cellStart !== undefined
  100. && cellStart + (seenCellInRow ? 0 : 1) < index
  101. && (token.type === 'tableCellDivider'
  102. || (token.type === 'tableRow'
  103. && (cellStart + 3 < index
  104. || events[cellStart][1].type !== types.whitespace)))
  105. ) {
  106. const cell = {
  107. // eslint-disable-next-line no-nested-ternary
  108. type: inDelimiterRow
  109. ? 'tableDelimiter'
  110. : inHead
  111. ? 'tableHeader'
  112. : 'tableData',
  113. start: events[cellStart][1].start,
  114. end: events[index][1].end,
  115. };
  116. events.splice(index + (token.type === 'tableCellDivider' ? 1 : 0), 0, [
  117. 'exit',
  118. cell,
  119. context,
  120. ]);
  121. events.splice(cellStart, 0, ['enter', cell, context]);
  122. index += 2;
  123. cellStart = index + 1;
  124. seenCellInRow = true;
  125. }
  126. if (token.type === 'tableRow') {
  127. inRow = events[index][0] === 'enter';
  128. if (inRow) {
  129. cellStart = index + 1;
  130. seenCellInRow = false;
  131. }
  132. }
  133. if (token.type === 'tableDelimiterRow') {
  134. inDelimiterRow = events[index][0] === 'enter';
  135. if (inDelimiterRow) {
  136. cellStart = index + 1;
  137. seenCellInRow = false;
  138. }
  139. }
  140. if (token.type === 'tableHead') {
  141. inHead = events[index][0] === 'enter';
  142. }
  143. }
  144. return events;
  145. }
  146. /** @type {Tokenizer} */
  147. function tokenizeTable(effects, ok, nok) {
  148. // eslint-disable-next-line @typescript-eslint/no-this-alias
  149. const self = this;
  150. /** @type {Array<Align>} */
  151. const align = [];
  152. let tableHeaderCount = 0;
  153. /** @type {boolean|undefined} */
  154. let seenDelimiter;
  155. /** @type {boolean|undefined} */
  156. let hasDash;
  157. return start;
  158. /** @type {State} */
  159. function start(code) {
  160. const { containerState } = self;
  161. const prevRowCount = containerState.rowCount ?? 0;
  162. const hasDelimiterRow = containerState.hasDelimiterRow ?? false;
  163. // @ts-expect-error Custom.
  164. effects.enter('table')._align = align;
  165. effects.enter('tableHead');
  166. effects.enter('tableRow');
  167. // increment row count
  168. const crrRowCount = prevRowCount + 1;
  169. containerState.rowCount = crrRowCount;
  170. // Max 2 rows processing before delimiter row
  171. if (hasDelimiterRow || crrRowCount > 2) {
  172. return nok(code);
  173. }
  174. // If we start with a pipe, we open a cell marker.
  175. if (code === codes.verticalBar) {
  176. return cellDividerHead(code);
  177. }
  178. tableHeaderCount++;
  179. effects.enter('temporaryTableCellContent');
  180. // Can’t be space or eols at the start of a construct, so we’re in a cell.
  181. assert(!markdownLineEndingOrSpace(code), 'expected non-space');
  182. return inCellContentHead(code);
  183. }
  184. /** @type {State} */
  185. function cellDividerHead(code) {
  186. assert(code === codes.verticalBar, 'expected `|`');
  187. effects.enter('tableCellDivider');
  188. effects.consume(code);
  189. effects.exit('tableCellDivider');
  190. seenDelimiter = true;
  191. return cellBreakHead;
  192. }
  193. /** @type {State} */
  194. function cellBreakHead(code) {
  195. if (code === codes.eof || markdownLineEnding(code)) {
  196. return atRowEndHead(code);
  197. }
  198. if (markdownSpace(code)) {
  199. effects.enter(types.whitespace);
  200. effects.consume(code);
  201. return inWhitespaceHead;
  202. }
  203. if (seenDelimiter) {
  204. seenDelimiter = undefined;
  205. tableHeaderCount++;
  206. }
  207. if (code === codes.verticalBar) {
  208. return cellDividerHead(code);
  209. }
  210. // Anything else is cell content.
  211. effects.enter('temporaryTableCellContent');
  212. return inCellContentHead(code);
  213. }
  214. /** @type {State} */
  215. function inWhitespaceHead(code) {
  216. if (markdownSpace(code)) {
  217. effects.consume(code);
  218. return inWhitespaceHead;
  219. }
  220. effects.exit(types.whitespace);
  221. return cellBreakHead(code);
  222. }
  223. /** @type {State} */
  224. function inCellContentHead(code) {
  225. // EOF, whitespace, pipe
  226. if (
  227. code === codes.eof
  228. || code === codes.verticalBar
  229. || markdownLineEndingOrSpace(code)
  230. ) {
  231. effects.exit('temporaryTableCellContent');
  232. return cellBreakHead(code);
  233. }
  234. effects.consume(code);
  235. return code === codes.backslash
  236. ? inCellContentEscapeHead
  237. : inCellContentHead;
  238. }
  239. /** @type {State} */
  240. function inCellContentEscapeHead(code) {
  241. if (code === codes.backslash || code === codes.verticalBar) {
  242. effects.consume(code);
  243. return inCellContentHead;
  244. }
  245. // Anything else.
  246. return inCellContentHead(code);
  247. }
  248. /** @type {State} */
  249. function atRowEndHead(code) {
  250. // for debug -- 2023.05.06 Yuki Takei
  251. // const { containerState } = self;
  252. // let atRowEndHeadCount = containerState.atRowEndHeadCount ?? 0;
  253. // atRowEndHeadCount++;
  254. // containerState.atRowEndHeadCount = atRowEndHeadCount;
  255. // console.log({ atRowEndHeadCount });
  256. if (code === codes.eof) {
  257. return tableExit(code);
  258. }
  259. assert(markdownLineEnding(code), 'expected eol');
  260. effects.exit('tableRow');
  261. effects.exit('tableHead');
  262. const originalInterrupt = self.interrupt;
  263. self.interrupt = true;
  264. return effects.attempt(
  265. { tokenize: tokenizeRowEnd, partial: true },
  266. (code) => {
  267. self.interrupt = originalInterrupt;
  268. effects.enter('tableDelimiterRow');
  269. return atDelimiterRowBreak(code);
  270. },
  271. (code) => {
  272. self.interrupt = originalInterrupt;
  273. return tableExit(code);
  274. },
  275. )(code);
  276. }
  277. /** @type {State} */
  278. function atDelimiterRowBreak(code) {
  279. // persist that the table has a delimiter row
  280. self.containerState.hasDelimiterRow = true;
  281. if (code === codes.eof || markdownLineEnding(code)) {
  282. return rowEndDelimiter(code);
  283. }
  284. if (markdownSpace(code)) {
  285. effects.enter(types.whitespace);
  286. effects.consume(code);
  287. return inWhitespaceDelimiter;
  288. }
  289. if (code === codes.dash) {
  290. effects.enter('tableDelimiterFiller');
  291. effects.consume(code);
  292. hasDash = true;
  293. align.push('none');
  294. return inFillerDelimiter;
  295. }
  296. if (code === codes.colon) {
  297. effects.enter('tableDelimiterAlignment');
  298. effects.consume(code);
  299. effects.exit('tableDelimiterAlignment');
  300. align.push('left');
  301. return afterLeftAlignment;
  302. }
  303. // If we start with a pipe, we open a cell marker.
  304. if (code === codes.verticalBar) {
  305. effects.enter('tableCellDivider');
  306. effects.consume(code);
  307. effects.exit('tableCellDivider');
  308. return atDelimiterRowBreak;
  309. }
  310. return tableExit(code);
  311. }
  312. /** @type {State} */
  313. function inWhitespaceDelimiter(code) {
  314. if (markdownSpace(code)) {
  315. effects.consume(code);
  316. return inWhitespaceDelimiter;
  317. }
  318. effects.exit(types.whitespace);
  319. return atDelimiterRowBreak(code);
  320. }
  321. /** @type {State} */
  322. function inFillerDelimiter(code) {
  323. if (code === codes.dash) {
  324. effects.consume(code);
  325. return inFillerDelimiter;
  326. }
  327. effects.exit('tableDelimiterFiller');
  328. if (code === codes.colon) {
  329. effects.enter('tableDelimiterAlignment');
  330. effects.consume(code);
  331. effects.exit('tableDelimiterAlignment');
  332. align[align.length - 1] = align[align.length - 1] === 'left' ? 'center' : 'right';
  333. return afterRightAlignment;
  334. }
  335. return atDelimiterRowBreak(code);
  336. }
  337. /** @type {State} */
  338. function afterLeftAlignment(code) {
  339. if (code === codes.dash) {
  340. effects.enter('tableDelimiterFiller');
  341. effects.consume(code);
  342. hasDash = true;
  343. return inFillerDelimiter;
  344. }
  345. // Anything else is not ok.
  346. return tableExit(code);
  347. }
  348. /** @type {State} */
  349. function afterRightAlignment(code) {
  350. if (code === codes.eof || markdownLineEnding(code)) {
  351. return rowEndDelimiter(code);
  352. }
  353. if (markdownSpace(code)) {
  354. effects.enter(types.whitespace);
  355. effects.consume(code);
  356. return inWhitespaceDelimiter;
  357. }
  358. // `|`
  359. if (code === codes.verticalBar) {
  360. effects.enter('tableCellDivider');
  361. effects.consume(code);
  362. effects.exit('tableCellDivider');
  363. return atDelimiterRowBreak;
  364. }
  365. return tableExit(code);
  366. }
  367. /** @type {State} */
  368. function rowEndDelimiter(code) {
  369. effects.exit('tableDelimiterRow');
  370. // Exit if there was no dash at all, or if the header cell count is not the
  371. // delimiter cell count.
  372. if (!hasDash || tableHeaderCount !== align.length) {
  373. return tableExit(code);
  374. }
  375. if (code === codes.eof) {
  376. return tableClose(code);
  377. }
  378. assert(markdownLineEnding(code), 'expected eol');
  379. return effects.check(
  380. nextPrefixedOrBlank,
  381. tableClose,
  382. effects.attempt(
  383. { tokenize: tokenizeRowEnd, partial: true },
  384. factorySpace(effects, bodyStart, types.linePrefix, constants.tabSize),
  385. tableClose,
  386. ),
  387. )(code);
  388. }
  389. /** @type {State} */
  390. function tableExit(code) {
  391. // delete persisted states
  392. delete self.containerState.rowCount;
  393. delete self.containerState.hasDelimiterRow;
  394. return nok(code);
  395. }
  396. /** @type {State} */
  397. function tableClose(code) {
  398. effects.exit('table');
  399. // delete persisted states
  400. delete self.containerState.rowCount;
  401. delete self.containerState.hasDelimiterRow;
  402. return ok(code);
  403. }
  404. /** @type {State} */
  405. function bodyStart(code) {
  406. effects.enter('tableBody');
  407. return rowStartBody(code);
  408. }
  409. /** @type {State} */
  410. function rowStartBody(code) {
  411. effects.enter('tableRow');
  412. // If we start with a pipe, we open a cell marker.
  413. if (code === codes.verticalBar) {
  414. return cellDividerBody(code);
  415. }
  416. effects.enter('temporaryTableCellContent');
  417. // Can’t be space or eols at the start of a construct, so we’re in a cell.
  418. return inCellContentBody(code);
  419. }
  420. /** @type {State} */
  421. function cellDividerBody(code) {
  422. assert(code === codes.verticalBar, 'expected `|`');
  423. effects.enter('tableCellDivider');
  424. effects.consume(code);
  425. effects.exit('tableCellDivider');
  426. return cellBreakBody;
  427. }
  428. /** @type {State} */
  429. function cellBreakBody(code) {
  430. if (code === codes.eof || markdownLineEnding(code)) {
  431. return atRowEndBody(code);
  432. }
  433. if (markdownSpace(code)) {
  434. effects.enter(types.whitespace);
  435. effects.consume(code);
  436. return inWhitespaceBody;
  437. }
  438. // `|`
  439. if (code === codes.verticalBar) {
  440. return cellDividerBody(code);
  441. }
  442. // Anything else is cell content.
  443. effects.enter('temporaryTableCellContent');
  444. return inCellContentBody(code);
  445. }
  446. /** @type {State} */
  447. function inWhitespaceBody(code) {
  448. if (markdownSpace(code)) {
  449. effects.consume(code);
  450. return inWhitespaceBody;
  451. }
  452. effects.exit(types.whitespace);
  453. return cellBreakBody(code);
  454. }
  455. /** @type {State} */
  456. function inCellContentBody(code) {
  457. // EOF, whitespace, pipe
  458. if (
  459. code === codes.eof
  460. || code === codes.verticalBar
  461. || markdownLineEndingOrSpace(code)
  462. ) {
  463. effects.exit('temporaryTableCellContent');
  464. return cellBreakBody(code);
  465. }
  466. effects.consume(code);
  467. return code === codes.backslash
  468. ? inCellContentEscapeBody
  469. : inCellContentBody;
  470. }
  471. /** @type {State} */
  472. function inCellContentEscapeBody(code) {
  473. if (code === codes.backslash || code === codes.verticalBar) {
  474. effects.consume(code);
  475. return inCellContentBody;
  476. }
  477. // Anything else.
  478. return inCellContentBody(code);
  479. }
  480. /** @type {State} */
  481. function atRowEndBody(code) {
  482. effects.exit('tableRow');
  483. if (code === codes.eof) {
  484. return tableBodyClose(code);
  485. }
  486. return effects.check(
  487. nextPrefixedOrBlank,
  488. tableBodyClose,
  489. effects.attempt(
  490. { tokenize: tokenizeRowEnd, partial: true },
  491. factorySpace(
  492. effects,
  493. rowStartBody,
  494. types.linePrefix,
  495. constants.tabSize,
  496. ),
  497. tableBodyClose,
  498. ),
  499. )(code);
  500. }
  501. /** @type {State} */
  502. function tableBodyClose(code) {
  503. effects.exit('tableBody');
  504. return tableClose(code);
  505. }
  506. /** @type {Tokenizer} */
  507. function tokenizeRowEnd(effects, ok, nok) {
  508. return start;
  509. /** @type {State} */
  510. function start(code) {
  511. assert(markdownLineEnding(code), 'expected eol');
  512. effects.enter(types.lineEnding);
  513. effects.consume(code);
  514. effects.exit(types.lineEnding);
  515. return factorySpace(effects, prefixed, types.linePrefix);
  516. }
  517. /** @type {State} */
  518. function prefixed(code) {
  519. // Blank or interrupting line.
  520. if (
  521. self.parser.lazy[self.now().line]
  522. || code === codes.eof
  523. || markdownLineEnding(code)
  524. ) {
  525. return nok(code);
  526. }
  527. const tail = self.events[self.events.length - 1];
  528. // Indented code can interrupt delimiter and body rows.
  529. if (
  530. !self.parser.constructs.disable.null.includes('codeIndented')
  531. && tail
  532. && tail[1].type === types.linePrefix
  533. && tail[2].sliceSerialize(tail[1], true).length >= constants.tabSize
  534. ) {
  535. return nok(code);
  536. }
  537. self._gfmTableDynamicInterruptHack = true;
  538. return effects.check(
  539. self.parser.constructs.flow,
  540. (code) => {
  541. self._gfmTableDynamicInterruptHack = false;
  542. return nok(code);
  543. },
  544. (code) => {
  545. self._gfmTableDynamicInterruptHack = false;
  546. return ok(code);
  547. },
  548. )(code);
  549. }
  550. }
  551. }
  552. /** @type {Tokenizer} */
  553. function tokenizeNextPrefixedOrBlank(effects, ok, nok) {
  554. let size = 0;
  555. return start;
  556. /** @type {State} */
  557. function start(code) {
  558. // This is a check, so we don’t care about tokens, but we open a bogus one
  559. // so we’re valid.
  560. effects.enter('check');
  561. // EOL.
  562. effects.consume(code);
  563. return whitespace;
  564. }
  565. /** @type {State} */
  566. function whitespace(code) {
  567. if (code === codes.virtualSpace || code === codes.space) {
  568. effects.consume(code);
  569. size++;
  570. return size === constants.tabSize ? ok : whitespace;
  571. }
  572. // EOF or whitespace
  573. if (code === codes.eof || markdownLineEndingOrSpace(code)) {
  574. return ok(code);
  575. }
  576. // Anything else.
  577. return nok(code);
  578. }
  579. }