syntax.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. /**
  2. * @typedef {import('micromark-util-types').Extension} Extension
  3. * @typedef {import('micromark-util-types').Resolver} Resolver
  4. * @typedef {import('micromark-util-types').Tokenizer} Tokenizer
  5. * @typedef {import('micromark-util-types').State} State
  6. * @typedef {import('micromark-util-types').Token} Token
  7. */
  8. /**
  9. * @typedef {'left'|'center'|'right'|'none'} Align
  10. */
  11. import {ok as assert} from 'uvu/assert'
  12. import {factorySpace} from 'micromark-factory-space'
  13. import {
  14. markdownLineEnding,
  15. markdownLineEndingOrSpace,
  16. markdownSpace
  17. } from 'micromark-util-character'
  18. import {codes} from 'micromark-util-symbol/codes.js'
  19. import {constants} from 'micromark-util-symbol/constants.js'
  20. import {types} from 'micromark-util-symbol/types.js'
  21. /**
  22. * Syntax extension for micromark (passed in `extensions`).
  23. *
  24. * @type {Extension}
  25. */
  26. export const gfmTable = {
  27. flow: {null: {tokenize: tokenizeTable, resolve: resolveTable}}
  28. }
  29. const nextPrefixedOrBlank = {
  30. tokenize: tokenizeNextPrefixedOrBlank,
  31. partial: true
  32. }
  33. /** @type {Resolver} */
  34. // eslint-disable-next-line complexity
  35. function resolveTable(events, context) {
  36. let index = -1
  37. /** @type {boolean|undefined} */
  38. let inHead
  39. /** @type {boolean|undefined} */
  40. let inDelimiterRow
  41. /** @type {boolean|undefined} */
  42. let inRow
  43. /** @type {number|undefined} */
  44. let contentStart
  45. /** @type {number|undefined} */
  46. let contentEnd
  47. /** @type {number|undefined} */
  48. let cellStart
  49. /** @type {boolean|undefined} */
  50. let seenCellInRow
  51. while (++index < events.length) {
  52. const token = events[index][1]
  53. if (inRow) {
  54. if (token.type === 'temporaryTableCellContent') {
  55. contentStart = contentStart || index
  56. contentEnd = index
  57. }
  58. if (
  59. // Combine separate content parts into one.
  60. (token.type === 'tableCellDivider' || token.type === 'tableRow') &&
  61. contentEnd
  62. ) {
  63. assert(
  64. contentStart,
  65. 'expected `contentStart` to be defined if `contentEnd` is'
  66. )
  67. const content = {
  68. type: 'tableContent',
  69. start: events[contentStart][1].start,
  70. end: events[contentEnd][1].end
  71. }
  72. /** @type {Token} */
  73. const text = {
  74. type: types.chunkText,
  75. start: content.start,
  76. end: content.end,
  77. // @ts-expect-error It’s fine.
  78. contentType: constants.contentTypeText
  79. }
  80. assert(
  81. contentStart,
  82. 'expected `contentStart` to be defined if `contentEnd` is'
  83. )
  84. events.splice(
  85. contentStart,
  86. contentEnd - contentStart + 1,
  87. ['enter', content, context],
  88. ['enter', text, context],
  89. ['exit', text, context],
  90. ['exit', content, context]
  91. )
  92. index -= contentEnd - contentStart - 3
  93. contentStart = undefined
  94. contentEnd = undefined
  95. }
  96. }
  97. if (
  98. events[index][0] === 'exit' &&
  99. cellStart !== undefined &&
  100. cellStart + (seenCellInRow ? 0 : 1) < index &&
  101. (token.type === 'tableCellDivider' ||
  102. (token.type === 'tableRow' &&
  103. (cellStart + 3 < index ||
  104. events[cellStart][1].type !== types.whitespace)))
  105. ) {
  106. const cell = {
  107. type: inDelimiterRow
  108. ? 'tableDelimiter'
  109. : inHead
  110. ? 'tableHeader'
  111. : 'tableData',
  112. start: events[cellStart][1].start,
  113. end: events[index][1].end
  114. }
  115. events.splice(index + (token.type === 'tableCellDivider' ? 1 : 0), 0, [
  116. 'exit',
  117. cell,
  118. context
  119. ])
  120. events.splice(cellStart, 0, ['enter', cell, context])
  121. index += 2
  122. cellStart = index + 1
  123. seenCellInRow = true
  124. }
  125. if (token.type === 'tableRow') {
  126. inRow = events[index][0] === 'enter'
  127. if (inRow) {
  128. cellStart = index + 1
  129. seenCellInRow = false
  130. }
  131. }
  132. if (token.type === 'tableDelimiterRow') {
  133. inDelimiterRow = events[index][0] === 'enter'
  134. if (inDelimiterRow) {
  135. cellStart = index + 1
  136. seenCellInRow = false
  137. }
  138. }
  139. if (token.type === 'tableHead') {
  140. inHead = events[index][0] === 'enter'
  141. }
  142. }
  143. return events
  144. }
  145. /** @type {Tokenizer} */
  146. function tokenizeTable(effects, ok, nok) {
  147. const self = this
  148. /** @type {Array<Align>} */
  149. const align = []
  150. let tableHeaderCount = 0
  151. /** @type {boolean|undefined} */
  152. let seenDelimiter
  153. /** @type {boolean|undefined} */
  154. let hasDash
  155. return start
  156. /** @type {State} */
  157. function start(code) {
  158. // @ts-expect-error Custom.
  159. effects.enter('table')._align = align
  160. effects.enter('tableHead')
  161. effects.enter('tableRow')
  162. // If we start with a pipe, we open a cell marker.
  163. if (code === codes.verticalBar) {
  164. return cellDividerHead(code)
  165. }
  166. tableHeaderCount++
  167. effects.enter('temporaryTableCellContent')
  168. // Can’t be space or eols at the start of a construct, so we’re in a cell.
  169. assert(!markdownLineEndingOrSpace(code), 'expected non-space')
  170. return inCellContentHead(code)
  171. }
  172. /** @type {State} */
  173. function cellDividerHead(code) {
  174. assert(code === codes.verticalBar, 'expected `|`')
  175. effects.enter('tableCellDivider')
  176. effects.consume(code)
  177. effects.exit('tableCellDivider')
  178. seenDelimiter = true
  179. return cellBreakHead
  180. }
  181. /** @type {State} */
  182. function cellBreakHead(code) {
  183. if (code === codes.eof || markdownLineEnding(code)) {
  184. return atRowEndHead(code)
  185. }
  186. if (markdownSpace(code)) {
  187. effects.enter(types.whitespace)
  188. effects.consume(code)
  189. return inWhitespaceHead
  190. }
  191. if (seenDelimiter) {
  192. seenDelimiter = undefined
  193. tableHeaderCount++
  194. }
  195. if (code === codes.verticalBar) {
  196. return cellDividerHead(code)
  197. }
  198. // Anything else is cell content.
  199. effects.enter('temporaryTableCellContent')
  200. return inCellContentHead(code)
  201. }
  202. /** @type {State} */
  203. function inWhitespaceHead(code) {
  204. if (markdownSpace(code)) {
  205. effects.consume(code)
  206. return inWhitespaceHead
  207. }
  208. effects.exit(types.whitespace)
  209. return cellBreakHead(code)
  210. }
  211. /** @type {State} */
  212. function inCellContentHead(code) {
  213. // EOF, whitespace, pipe
  214. if (
  215. code === codes.eof ||
  216. code === codes.verticalBar ||
  217. markdownLineEndingOrSpace(code)
  218. ) {
  219. effects.exit('temporaryTableCellContent')
  220. return cellBreakHead(code)
  221. }
  222. effects.consume(code)
  223. return code === codes.backslash
  224. ? inCellContentEscapeHead
  225. : inCellContentHead
  226. }
  227. /** @type {State} */
  228. function inCellContentEscapeHead(code) {
  229. if (code === codes.backslash || code === codes.verticalBar) {
  230. effects.consume(code)
  231. return inCellContentHead
  232. }
  233. // Anything else.
  234. return inCellContentHead(code)
  235. }
  236. /** @type {State} */
  237. function atRowEndHead(code) {
  238. if (code === codes.eof) {
  239. return nok(code)
  240. }
  241. assert(markdownLineEnding(code), 'expected eol')
  242. effects.exit('tableRow')
  243. effects.exit('tableHead')
  244. const originalInterrupt = self.interrupt
  245. self.interrupt = true
  246. return effects.attempt(
  247. {tokenize: tokenizeRowEnd, partial: true},
  248. function (code) {
  249. self.interrupt = originalInterrupt
  250. effects.enter('tableDelimiterRow')
  251. return atDelimiterRowBreak(code)
  252. },
  253. function (code) {
  254. self.interrupt = originalInterrupt
  255. return nok(code)
  256. }
  257. )(code)
  258. }
  259. /** @type {State} */
  260. function atDelimiterRowBreak(code) {
  261. if (code === codes.eof || markdownLineEnding(code)) {
  262. return rowEndDelimiter(code)
  263. }
  264. if (markdownSpace(code)) {
  265. effects.enter(types.whitespace)
  266. effects.consume(code)
  267. return inWhitespaceDelimiter
  268. }
  269. if (code === codes.dash) {
  270. effects.enter('tableDelimiterFiller')
  271. effects.consume(code)
  272. hasDash = true
  273. align.push('none')
  274. return inFillerDelimiter
  275. }
  276. if (code === codes.colon) {
  277. effects.enter('tableDelimiterAlignment')
  278. effects.consume(code)
  279. effects.exit('tableDelimiterAlignment')
  280. align.push('left')
  281. return afterLeftAlignment
  282. }
  283. // If we start with a pipe, we open a cell marker.
  284. if (code === codes.verticalBar) {
  285. effects.enter('tableCellDivider')
  286. effects.consume(code)
  287. effects.exit('tableCellDivider')
  288. return atDelimiterRowBreak
  289. }
  290. return nok(code)
  291. }
  292. /** @type {State} */
  293. function inWhitespaceDelimiter(code) {
  294. if (markdownSpace(code)) {
  295. effects.consume(code)
  296. return inWhitespaceDelimiter
  297. }
  298. effects.exit(types.whitespace)
  299. return atDelimiterRowBreak(code)
  300. }
  301. /** @type {State} */
  302. function inFillerDelimiter(code) {
  303. if (code === codes.dash) {
  304. effects.consume(code)
  305. return inFillerDelimiter
  306. }
  307. effects.exit('tableDelimiterFiller')
  308. if (code === codes.colon) {
  309. effects.enter('tableDelimiterAlignment')
  310. effects.consume(code)
  311. effects.exit('tableDelimiterAlignment')
  312. align[align.length - 1] =
  313. align[align.length - 1] === 'left' ? 'center' : 'right'
  314. return afterRightAlignment
  315. }
  316. return atDelimiterRowBreak(code)
  317. }
  318. /** @type {State} */
  319. function afterLeftAlignment(code) {
  320. if (code === codes.dash) {
  321. effects.enter('tableDelimiterFiller')
  322. effects.consume(code)
  323. hasDash = true
  324. return inFillerDelimiter
  325. }
  326. // Anything else is not ok.
  327. return nok(code)
  328. }
  329. /** @type {State} */
  330. function afterRightAlignment(code) {
  331. if (code === codes.eof || markdownLineEnding(code)) {
  332. return rowEndDelimiter(code)
  333. }
  334. if (markdownSpace(code)) {
  335. effects.enter(types.whitespace)
  336. effects.consume(code)
  337. return inWhitespaceDelimiter
  338. }
  339. // `|`
  340. if (code === codes.verticalBar) {
  341. effects.enter('tableCellDivider')
  342. effects.consume(code)
  343. effects.exit('tableCellDivider')
  344. return atDelimiterRowBreak
  345. }
  346. return nok(code)
  347. }
  348. /** @type {State} */
  349. function rowEndDelimiter(code) {
  350. effects.exit('tableDelimiterRow')
  351. // Exit if there was no dash at all, or if the header cell count is not the
  352. // delimiter cell count.
  353. if (!hasDash || tableHeaderCount !== align.length) {
  354. return nok(code)
  355. }
  356. if (code === codes.eof) {
  357. return tableClose(code)
  358. }
  359. assert(markdownLineEnding(code), 'expected eol')
  360. return effects.check(
  361. nextPrefixedOrBlank,
  362. tableClose,
  363. effects.attempt(
  364. {tokenize: tokenizeRowEnd, partial: true},
  365. factorySpace(effects, bodyStart, types.linePrefix, constants.tabSize),
  366. tableClose
  367. )
  368. )(code)
  369. }
  370. /** @type {State} */
  371. function tableClose(code) {
  372. effects.exit('table')
  373. return ok(code)
  374. }
  375. /** @type {State} */
  376. function bodyStart(code) {
  377. effects.enter('tableBody')
  378. return rowStartBody(code)
  379. }
  380. /** @type {State} */
  381. function rowStartBody(code) {
  382. effects.enter('tableRow')
  383. // If we start with a pipe, we open a cell marker.
  384. if (code === codes.verticalBar) {
  385. return cellDividerBody(code)
  386. }
  387. effects.enter('temporaryTableCellContent')
  388. // Can’t be space or eols at the start of a construct, so we’re in a cell.
  389. return inCellContentBody(code)
  390. }
  391. /** @type {State} */
  392. function cellDividerBody(code) {
  393. assert(code === codes.verticalBar, 'expected `|`')
  394. effects.enter('tableCellDivider')
  395. effects.consume(code)
  396. effects.exit('tableCellDivider')
  397. return cellBreakBody
  398. }
  399. /** @type {State} */
  400. function cellBreakBody(code) {
  401. if (code === codes.eof || markdownLineEnding(code)) {
  402. return atRowEndBody(code)
  403. }
  404. if (markdownSpace(code)) {
  405. effects.enter(types.whitespace)
  406. effects.consume(code)
  407. return inWhitespaceBody
  408. }
  409. // `|`
  410. if (code === codes.verticalBar) {
  411. return cellDividerBody(code)
  412. }
  413. // Anything else is cell content.
  414. effects.enter('temporaryTableCellContent')
  415. return inCellContentBody(code)
  416. }
  417. /** @type {State} */
  418. function inWhitespaceBody(code) {
  419. if (markdownSpace(code)) {
  420. effects.consume(code)
  421. return inWhitespaceBody
  422. }
  423. effects.exit(types.whitespace)
  424. return cellBreakBody(code)
  425. }
  426. /** @type {State} */
  427. function inCellContentBody(code) {
  428. // EOF, whitespace, pipe
  429. if (
  430. code === codes.eof ||
  431. code === codes.verticalBar ||
  432. markdownLineEndingOrSpace(code)
  433. ) {
  434. effects.exit('temporaryTableCellContent')
  435. return cellBreakBody(code)
  436. }
  437. effects.consume(code)
  438. return code === codes.backslash
  439. ? inCellContentEscapeBody
  440. : inCellContentBody
  441. }
  442. /** @type {State} */
  443. function inCellContentEscapeBody(code) {
  444. if (code === codes.backslash || code === codes.verticalBar) {
  445. effects.consume(code)
  446. return inCellContentBody
  447. }
  448. // Anything else.
  449. return inCellContentBody(code)
  450. }
  451. /** @type {State} */
  452. function atRowEndBody(code) {
  453. effects.exit('tableRow')
  454. if (code === codes.eof) {
  455. return tableBodyClose(code)
  456. }
  457. return effects.check(
  458. nextPrefixedOrBlank,
  459. tableBodyClose,
  460. effects.attempt(
  461. {tokenize: tokenizeRowEnd, partial: true},
  462. factorySpace(
  463. effects,
  464. rowStartBody,
  465. types.linePrefix,
  466. constants.tabSize
  467. ),
  468. tableBodyClose
  469. )
  470. )(code)
  471. }
  472. /** @type {State} */
  473. function tableBodyClose(code) {
  474. effects.exit('tableBody')
  475. return tableClose(code)
  476. }
  477. /** @type {Tokenizer} */
  478. function tokenizeRowEnd(effects, ok, nok) {
  479. return start
  480. /** @type {State} */
  481. function start(code) {
  482. assert(markdownLineEnding(code), 'expected eol')
  483. effects.enter(types.lineEnding)
  484. effects.consume(code)
  485. effects.exit(types.lineEnding)
  486. return factorySpace(effects, prefixed, types.linePrefix)
  487. }
  488. /** @type {State} */
  489. function prefixed(code) {
  490. // Blank or interrupting line.
  491. if (
  492. self.parser.lazy[self.now().line] ||
  493. code === codes.eof ||
  494. markdownLineEnding(code)
  495. ) {
  496. return nok(code)
  497. }
  498. const tail = self.events[self.events.length - 1]
  499. // Indented code can interrupt delimiter and body rows.
  500. if (
  501. !self.parser.constructs.disable.null.includes('codeIndented') &&
  502. tail &&
  503. tail[1].type === types.linePrefix &&
  504. tail[2].sliceSerialize(tail[1], true).length >= constants.tabSize
  505. ) {
  506. return nok(code)
  507. }
  508. self._gfmTableDynamicInterruptHack = true
  509. return effects.check(
  510. self.parser.constructs.flow,
  511. function (code) {
  512. self._gfmTableDynamicInterruptHack = false
  513. return nok(code)
  514. },
  515. function (code) {
  516. self._gfmTableDynamicInterruptHack = false
  517. return ok(code)
  518. }
  519. )(code)
  520. }
  521. }
  522. }
  523. /** @type {Tokenizer} */
  524. function tokenizeNextPrefixedOrBlank(effects, ok, nok) {
  525. let size = 0
  526. return start
  527. /** @type {State} */
  528. function start(code) {
  529. // This is a check, so we don’t care about tokens, but we open a bogus one
  530. // so we’re valid.
  531. effects.enter('check')
  532. // EOL.
  533. effects.consume(code)
  534. return whitespace
  535. }
  536. /** @type {State} */
  537. function whitespace(code) {
  538. if (code === codes.virtualSpace || code === codes.space) {
  539. effects.consume(code)
  540. size++
  541. return size === constants.tabSize ? ok : whitespace
  542. }
  543. // EOF or whitespace
  544. if (code === codes.eof || markdownLineEndingOrSpace(code)) {
  545. return ok(code)
  546. }
  547. // Anything else.
  548. return nok(code)
  549. }
  550. }