mostly filebased Content Presentation System
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

1686 lines
41KB

  1. <?php
  2. #
  3. #
  4. # Parsedown
  5. # http://parsedown.org
  6. #
  7. # (c) Emanuil Rusev
  8. # http://erusev.com
  9. #
  10. # For the full license information, view the LICENSE file that was distributed
  11. # with this source code.
  12. #
  13. #
  14. class Parsedown
  15. {
  16. # ~
  17. const version = '1.7.1';
  18. # ~
  19. function text($text)
  20. {
  21. # make sure no definitions are set
  22. $this->DefinitionData = array();
  23. # standardize line breaks
  24. $text = str_replace(array("\r\n", "\r"), "\n", $text);
  25. # remove surrounding line breaks
  26. $text = trim($text, "\n");
  27. # split text into lines
  28. $lines = explode("\n", $text);
  29. # iterate through lines to identify blocks
  30. $markup = $this->lines($lines);
  31. # trim line breaks
  32. $markup = trim($markup, "\n");
  33. return $markup;
  34. }
  35. #
  36. # Setters
  37. #
  38. function setBreaksEnabled($breaksEnabled)
  39. {
  40. $this->breaksEnabled = $breaksEnabled;
  41. return $this;
  42. }
  43. protected $breaksEnabled;
  44. function setMarkupEscaped($markupEscaped)
  45. {
  46. $this->markupEscaped = $markupEscaped;
  47. return $this;
  48. }
  49. protected $markupEscaped;
  50. function setUrlsLinked($urlsLinked)
  51. {
  52. $this->urlsLinked = $urlsLinked;
  53. return $this;
  54. }
  55. protected $urlsLinked = true;
  56. function setSafeMode($safeMode)
  57. {
  58. $this->safeMode = (bool) $safeMode;
  59. return $this;
  60. }
  61. protected $safeMode;
  62. protected $safeLinksWhitelist = array(
  63. 'http://',
  64. 'https://',
  65. 'ftp://',
  66. 'ftps://',
  67. 'mailto:',
  68. 'data:image/png;base64,',
  69. 'data:image/gif;base64,',
  70. 'data:image/jpeg;base64,',
  71. 'irc:',
  72. 'ircs:',
  73. 'git:',
  74. 'ssh:',
  75. 'news:',
  76. 'steam:',
  77. );
  78. #
  79. # Lines
  80. #
  81. protected $BlockTypes = array(
  82. '#' => array('Header'),
  83. '*' => array('Rule', 'List'),
  84. '+' => array('List'),
  85. '-' => array('SetextHeader', 'Table', 'Rule', 'List'),
  86. '0' => array('List'),
  87. '1' => array('List'),
  88. '2' => array('List'),
  89. '3' => array('List'),
  90. '4' => array('List'),
  91. '5' => array('List'),
  92. '6' => array('List'),
  93. '7' => array('List'),
  94. '8' => array('List'),
  95. '9' => array('List'),
  96. ':' => array('Table'),
  97. '<' => array('Comment', 'Markup'),
  98. '=' => array('SetextHeader'),
  99. '>' => array('Quote'),
  100. '[' => array('Reference'),
  101. '_' => array('Rule'),
  102. '`' => array('FencedCode'),
  103. '|' => array('Table'),
  104. '~' => array('FencedCode'),
  105. );
  106. # ~
  107. protected $unmarkedBlockTypes = array(
  108. 'Code',
  109. );
  110. #
  111. # Blocks
  112. #
  113. protected function lines(array $lines)
  114. {
  115. $CurrentBlock = null;
  116. foreach ($lines as $line)
  117. {
  118. if (chop($line) === '')
  119. {
  120. if (isset($CurrentBlock))
  121. {
  122. $CurrentBlock['interrupted'] = true;
  123. }
  124. continue;
  125. }
  126. if (strpos($line, "\t") !== false)
  127. {
  128. $parts = explode("\t", $line);
  129. $line = $parts[0];
  130. unset($parts[0]);
  131. foreach ($parts as $part)
  132. {
  133. $shortage = 4 - mb_strlen($line, 'utf-8') % 4;
  134. $line .= str_repeat(' ', $shortage);
  135. $line .= $part;
  136. }
  137. }
  138. $indent = 0;
  139. while (isset($line[$indent]) and $line[$indent] === ' ')
  140. {
  141. $indent ++;
  142. }
  143. $text = $indent > 0 ? substr($line, $indent) : $line;
  144. # ~
  145. $Line = array('body' => $line, 'indent' => $indent, 'text' => $text);
  146. # ~
  147. if (isset($CurrentBlock['continuable']))
  148. {
  149. $Block = $this->{'block'.$CurrentBlock['type'].'Continue'}($Line, $CurrentBlock);
  150. if (isset($Block))
  151. {
  152. $CurrentBlock = $Block;
  153. continue;
  154. }
  155. else
  156. {
  157. if ($this->isBlockCompletable($CurrentBlock['type']))
  158. {
  159. $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock);
  160. }
  161. }
  162. }
  163. # ~
  164. $marker = $text[0];
  165. # ~
  166. $blockTypes = $this->unmarkedBlockTypes;
  167. if (isset($this->BlockTypes[$marker]))
  168. {
  169. foreach ($this->BlockTypes[$marker] as $blockType)
  170. {
  171. $blockTypes []= $blockType;
  172. }
  173. }
  174. #
  175. # ~
  176. foreach ($blockTypes as $blockType)
  177. {
  178. $Block = $this->{'block'.$blockType}($Line, $CurrentBlock);
  179. if (isset($Block))
  180. {
  181. $Block['type'] = $blockType;
  182. if ( ! isset($Block['identified']))
  183. {
  184. $Blocks []= $CurrentBlock;
  185. $Block['identified'] = true;
  186. }
  187. if ($this->isBlockContinuable($blockType))
  188. {
  189. $Block['continuable'] = true;
  190. }
  191. $CurrentBlock = $Block;
  192. continue 2;
  193. }
  194. }
  195. # ~
  196. if (isset($CurrentBlock) and ! isset($CurrentBlock['type']) and ! isset($CurrentBlock['interrupted']))
  197. {
  198. $CurrentBlock['element']['text'] .= "\n".$text;
  199. }
  200. else
  201. {
  202. $Blocks []= $CurrentBlock;
  203. $CurrentBlock = $this->paragraph($Line);
  204. $CurrentBlock['identified'] = true;
  205. }
  206. }
  207. # ~
  208. if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type']))
  209. {
  210. $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock);
  211. }
  212. # ~
  213. $Blocks []= $CurrentBlock;
  214. unset($Blocks[0]);
  215. # ~
  216. $markup = '';
  217. foreach ($Blocks as $Block)
  218. {
  219. if (isset($Block['hidden']))
  220. {
  221. continue;
  222. }
  223. $markup .= "\n";
  224. $markup .= isset($Block['markup']) ? $Block['markup'] : $this->element($Block['element']);
  225. }
  226. $markup .= "\n";
  227. # ~
  228. return $markup;
  229. }
  230. protected function isBlockContinuable($Type)
  231. {
  232. return method_exists($this, 'block'.$Type.'Continue');
  233. }
  234. protected function isBlockCompletable($Type)
  235. {
  236. return method_exists($this, 'block'.$Type.'Complete');
  237. }
  238. #
  239. # Code
  240. protected function blockCode($Line, $Block = null)
  241. {
  242. if (isset($Block) and ! isset($Block['type']) and ! isset($Block['interrupted']))
  243. {
  244. return;
  245. }
  246. if ($Line['indent'] >= 4)
  247. {
  248. $text = substr($Line['body'], 4);
  249. $Block = array(
  250. 'element' => array(
  251. 'name' => 'pre',
  252. 'handler' => 'element',
  253. 'text' => array(
  254. 'name' => 'code',
  255. 'text' => $text,
  256. ),
  257. ),
  258. );
  259. return $Block;
  260. }
  261. }
  262. protected function blockCodeContinue($Line, $Block)
  263. {
  264. if ($Line['indent'] >= 4)
  265. {
  266. if (isset($Block['interrupted']))
  267. {
  268. $Block['element']['text']['text'] .= "\n";
  269. unset($Block['interrupted']);
  270. }
  271. $Block['element']['text']['text'] .= "\n";
  272. $text = substr($Line['body'], 4);
  273. $Block['element']['text']['text'] .= $text;
  274. return $Block;
  275. }
  276. }
  277. protected function blockCodeComplete($Block)
  278. {
  279. $text = $Block['element']['text']['text'];
  280. $Block['element']['text']['text'] = $text;
  281. return $Block;
  282. }
  283. #
  284. # Comment
  285. protected function blockComment($Line)
  286. {
  287. if ($this->markupEscaped or $this->safeMode)
  288. {
  289. return;
  290. }
  291. if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!')
  292. {
  293. $Block = array(
  294. 'markup' => $Line['body'],
  295. );
  296. if (preg_match('/-->$/', $Line['text']))
  297. {
  298. $Block['closed'] = true;
  299. }
  300. return $Block;
  301. }
  302. }
  303. protected function blockCommentContinue($Line, array $Block)
  304. {
  305. if (isset($Block['closed']))
  306. {
  307. return;
  308. }
  309. $Block['markup'] .= "\n" . $Line['body'];
  310. if (preg_match('/-->$/', $Line['text']))
  311. {
  312. $Block['closed'] = true;
  313. }
  314. return $Block;
  315. }
  316. #
  317. # Fenced Code
  318. protected function blockFencedCode($Line)
  319. {
  320. if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([^`]+)?[ ]*$/', $Line['text'], $matches))
  321. {
  322. $Element = array(
  323. 'name' => 'code',
  324. 'text' => '',
  325. );
  326. if (isset($matches[1]))
  327. {
  328. $class = 'language-'.$matches[1];
  329. $Element['attributes'] = array(
  330. 'class' => $class,
  331. );
  332. }
  333. $Block = array(
  334. 'char' => $Line['text'][0],
  335. 'element' => array(
  336. 'name' => 'pre',
  337. 'handler' => 'element',
  338. 'text' => $Element,
  339. ),
  340. );
  341. return $Block;
  342. }
  343. }
  344. protected function blockFencedCodeContinue($Line, $Block)
  345. {
  346. if (isset($Block['complete']))
  347. {
  348. return;
  349. }
  350. if (isset($Block['interrupted']))
  351. {
  352. $Block['element']['text']['text'] .= "\n";
  353. unset($Block['interrupted']);
  354. }
  355. if (preg_match('/^'.$Block['char'].'{3,}[ ]*$/', $Line['text']))
  356. {
  357. $Block['element']['text']['text'] = substr($Block['element']['text']['text'], 1);
  358. $Block['complete'] = true;
  359. return $Block;
  360. }
  361. $Block['element']['text']['text'] .= "\n".$Line['body'];
  362. return $Block;
  363. }
  364. protected function blockFencedCodeComplete($Block)
  365. {
  366. $text = $Block['element']['text']['text'];
  367. $Block['element']['text']['text'] = $text;
  368. return $Block;
  369. }
  370. #
  371. # Header
  372. protected function blockHeader($Line)
  373. {
  374. if (isset($Line['text'][1]))
  375. {
  376. $level = 1;
  377. while (isset($Line['text'][$level]) and $Line['text'][$level] === '#')
  378. {
  379. $level ++;
  380. }
  381. if ($level > 6)
  382. {
  383. return;
  384. }
  385. $text = trim($Line['text'], '# ');
  386. $Block = array(
  387. 'element' => array(
  388. 'name' => 'h' . min(6, $level),
  389. 'text' => $text,
  390. 'handler' => 'line',
  391. ),
  392. );
  393. return $Block;
  394. }
  395. }
  396. #
  397. # List
  398. protected function blockList($Line)
  399. {
  400. list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]+[.]');
  401. if (preg_match('/^('.$pattern.'[ ]+)(.*)/', $Line['text'], $matches))
  402. {
  403. $Block = array(
  404. 'indent' => $Line['indent'],
  405. 'pattern' => $pattern,
  406. 'element' => array(
  407. 'name' => $name,
  408. 'handler' => 'elements',
  409. ),
  410. );
  411. if($name === 'ol')
  412. {
  413. $listStart = stristr($matches[0], '.', true);
  414. if($listStart !== '1')
  415. {
  416. $Block['element']['attributes'] = array('start' => $listStart);
  417. }
  418. }
  419. $Block['li'] = array(
  420. 'name' => 'li',
  421. 'handler' => 'li',
  422. 'text' => array(
  423. $matches[2],
  424. ),
  425. );
  426. $Block['element']['text'] []= & $Block['li'];
  427. return $Block;
  428. }
  429. }
  430. protected function blockListContinue($Line, array $Block)
  431. {
  432. if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'(?:[ ]+(.*)|$)/', $Line['text'], $matches))
  433. {
  434. if (isset($Block['interrupted']))
  435. {
  436. $Block['li']['text'] []= '';
  437. $Block['loose'] = true;
  438. unset($Block['interrupted']);
  439. }
  440. unset($Block['li']);
  441. $text = isset($matches[1]) ? $matches[1] : '';
  442. $Block['li'] = array(
  443. 'name' => 'li',
  444. 'handler' => 'li',
  445. 'text' => array(
  446. $text,
  447. ),
  448. );
  449. $Block['element']['text'] []= & $Block['li'];
  450. return $Block;
  451. }
  452. if ($Line['text'][0] === '[' and $this->blockReference($Line))
  453. {
  454. return $Block;
  455. }
  456. if ( ! isset($Block['interrupted']))
  457. {
  458. $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']);
  459. $Block['li']['text'] []= $text;
  460. return $Block;
  461. }
  462. if ($Line['indent'] > 0)
  463. {
  464. $Block['li']['text'] []= '';
  465. $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']);
  466. $Block['li']['text'] []= $text;
  467. unset($Block['interrupted']);
  468. return $Block;
  469. }
  470. }
  471. protected function blockListComplete(array $Block)
  472. {
  473. if (isset($Block['loose']))
  474. {
  475. foreach ($Block['element']['text'] as &$li)
  476. {
  477. if (end($li['text']) !== '')
  478. {
  479. $li['text'] []= '';
  480. }
  481. }
  482. }
  483. return $Block;
  484. }
  485. #
  486. # Quote
  487. protected function blockQuote($Line)
  488. {
  489. if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches))
  490. {
  491. $Block = array(
  492. 'element' => array(
  493. 'name' => 'blockquote',
  494. 'handler' => 'lines',
  495. 'text' => (array) $matches[1],
  496. ),
  497. );
  498. return $Block;
  499. }
  500. }
  501. protected function blockQuoteContinue($Line, array $Block)
  502. {
  503. if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches))
  504. {
  505. if (isset($Block['interrupted']))
  506. {
  507. $Block['element']['text'] []= '';
  508. unset($Block['interrupted']);
  509. }
  510. $Block['element']['text'] []= $matches[1];
  511. return $Block;
  512. }
  513. if ( ! isset($Block['interrupted']))
  514. {
  515. $Block['element']['text'] []= $Line['text'];
  516. return $Block;
  517. }
  518. }
  519. #
  520. # Rule
  521. protected function blockRule($Line)
  522. {
  523. if (preg_match('/^(['.$Line['text'][0].'])([ ]*\1){2,}[ ]*$/', $Line['text']))
  524. {
  525. $Block = array(
  526. 'element' => array(
  527. 'name' => 'hr'
  528. ),
  529. );
  530. return $Block;
  531. }
  532. }
  533. #
  534. # Setext
  535. protected function blockSetextHeader($Line, array $Block = null)
  536. {
  537. if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted']))
  538. {
  539. return;
  540. }
  541. if (chop($Line['text'], $Line['text'][0]) === '')
  542. {
  543. $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2';
  544. return $Block;
  545. }
  546. }
  547. #
  548. # Markup
  549. protected function blockMarkup($Line)
  550. {
  551. if ($this->markupEscaped or $this->safeMode)
  552. {
  553. return;
  554. }
  555. if (preg_match('/^<(\w[\w-]*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches))
  556. {
  557. $element = strtolower($matches[1]);
  558. if (in_array($element, $this->textLevelElements))
  559. {
  560. return;
  561. }
  562. $Block = array(
  563. 'name' => $matches[1],
  564. 'depth' => 0,
  565. 'markup' => $Line['text'],
  566. );
  567. $length = strlen($matches[0]);
  568. $remainder = substr($Line['text'], $length);
  569. if (trim($remainder) === '')
  570. {
  571. if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
  572. {
  573. $Block['closed'] = true;
  574. $Block['void'] = true;
  575. }
  576. }
  577. else
  578. {
  579. if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
  580. {
  581. return;
  582. }
  583. if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder))
  584. {
  585. $Block['closed'] = true;
  586. }
  587. }
  588. return $Block;
  589. }
  590. }
  591. protected function blockMarkupContinue($Line, array $Block)
  592. {
  593. if (isset($Block['closed']))
  594. {
  595. return;
  596. }
  597. if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open
  598. {
  599. $Block['depth'] ++;
  600. }
  601. if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close
  602. {
  603. if ($Block['depth'] > 0)
  604. {
  605. $Block['depth'] --;
  606. }
  607. else
  608. {
  609. $Block['closed'] = true;
  610. }
  611. }
  612. if (isset($Block['interrupted']))
  613. {
  614. $Block['markup'] .= "\n";
  615. unset($Block['interrupted']);
  616. }
  617. $Block['markup'] .= "\n".$Line['body'];
  618. return $Block;
  619. }
  620. #
  621. # Reference
  622. protected function blockReference($Line)
  623. {
  624. if (preg_match('/^\[(.+?)\]:[ ]*<?(\S+?)>?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches))
  625. {
  626. $id = strtolower($matches[1]);
  627. $Data = array(
  628. 'url' => $matches[2],
  629. 'title' => null,
  630. );
  631. if (isset($matches[3]))
  632. {
  633. $Data['title'] = $matches[3];
  634. }
  635. $this->DefinitionData['Reference'][$id] = $Data;
  636. $Block = array(
  637. 'hidden' => true,
  638. );
  639. return $Block;
  640. }
  641. }
  642. #
  643. # Table
  644. protected function blockTable($Line, array $Block = null)
  645. {
  646. if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted']))
  647. {
  648. return;
  649. }
  650. if (strpos($Block['element']['text'], '|') !== false and chop($Line['text'], ' -:|') === '')
  651. {
  652. $alignments = array();
  653. $divider = $Line['text'];
  654. $divider = trim($divider);
  655. $divider = trim($divider, '|');
  656. $dividerCells = explode('|', $divider);
  657. foreach ($dividerCells as $dividerCell)
  658. {
  659. $dividerCell = trim($dividerCell);
  660. if ($dividerCell === '')
  661. {
  662. continue;
  663. }
  664. $alignment = null;
  665. if ($dividerCell[0] === ':')
  666. {
  667. $alignment = 'left';
  668. }
  669. if (substr($dividerCell, - 1) === ':')
  670. {
  671. $alignment = $alignment === 'left' ? 'center' : 'right';
  672. }
  673. $alignments []= $alignment;
  674. }
  675. # ~
  676. $HeaderElements = array();
  677. $header = $Block['element']['text'];
  678. $header = trim($header);
  679. $header = trim($header, '|');
  680. $headerCells = explode('|', $header);
  681. foreach ($headerCells as $index => $headerCell)
  682. {
  683. $headerCell = trim($headerCell);
  684. $HeaderElement = array(
  685. 'name' => 'th',
  686. 'text' => $headerCell,
  687. 'handler' => 'line',
  688. );
  689. if (isset($alignments[$index]))
  690. {
  691. $alignment = $alignments[$index];
  692. $HeaderElement['attributes'] = array(
  693. 'style' => 'text-align: '.$alignment.';',
  694. );
  695. }
  696. $HeaderElements []= $HeaderElement;
  697. }
  698. # ~
  699. $Block = array(
  700. 'alignments' => $alignments,
  701. 'identified' => true,
  702. 'element' => array(
  703. 'name' => 'table',
  704. 'handler' => 'elements',
  705. ),
  706. );
  707. $Block['element']['text'] []= array(
  708. 'name' => 'thead',
  709. 'handler' => 'elements',
  710. );
  711. $Block['element']['text'] []= array(
  712. 'name' => 'tbody',
  713. 'handler' => 'elements',
  714. 'text' => array(),
  715. );
  716. $Block['element']['text'][0]['text'] []= array(
  717. 'name' => 'tr',
  718. 'handler' => 'elements',
  719. 'text' => $HeaderElements,
  720. );
  721. return $Block;
  722. }
  723. }
  724. protected function blockTableContinue($Line, array $Block)
  725. {
  726. if (isset($Block['interrupted']))
  727. {
  728. return;
  729. }
  730. if ($Line['text'][0] === '|' or strpos($Line['text'], '|'))
  731. {
  732. $Elements = array();
  733. $row = $Line['text'];
  734. $row = trim($row);
  735. $row = trim($row, '|');
  736. preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]+`|`)+/', $row, $matches);
  737. foreach ($matches[0] as $index => $cell)
  738. {
  739. $cell = trim($cell);
  740. $Element = array(
  741. 'name' => 'td',
  742. 'handler' => 'line',
  743. 'text' => $cell,
  744. );
  745. if (isset($Block['alignments'][$index]))
  746. {
  747. $Element['attributes'] = array(
  748. 'style' => 'text-align: '.$Block['alignments'][$index].';',
  749. );
  750. }
  751. $Elements []= $Element;
  752. }
  753. $Element = array(
  754. 'name' => 'tr',
  755. 'handler' => 'elements',
  756. 'text' => $Elements,
  757. );
  758. $Block['element']['text'][1]['text'] []= $Element;
  759. return $Block;
  760. }
  761. }
  762. #
  763. # ~
  764. #
  765. protected function paragraph($Line)
  766. {
  767. $Block = array(
  768. 'element' => array(
  769. 'name' => 'p',
  770. 'text' => $Line['text'],
  771. 'handler' => 'line',
  772. ),
  773. );
  774. return $Block;
  775. }
  776. #
  777. # Inline Elements
  778. #
  779. protected $InlineTypes = array(
  780. '"' => array('SpecialCharacter'),
  781. '!' => array('Image'),
  782. '&' => array('SpecialCharacter'),
  783. '*' => array('Emphasis'),
  784. ':' => array('Url'),
  785. '<' => array('UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'),
  786. '>' => array('SpecialCharacter'),
  787. '[' => array('Link'),
  788. '_' => array('Emphasis'),
  789. '`' => array('Code'),
  790. '~' => array('Strikethrough'),
  791. '\\' => array('EscapeSequence'),
  792. );
  793. # ~
  794. protected $inlineMarkerList = '!"*_&[:<>`~\\';
  795. #
  796. # ~
  797. #
  798. public function line($text, $nonNestables=array())
  799. {
  800. $markup = '';
  801. # $excerpt is based on the first occurrence of a marker
  802. while ($excerpt = strpbrk($text, $this->inlineMarkerList))
  803. {
  804. $marker = $excerpt[0];
  805. $markerPosition = strpos($text, $marker);
  806. $Excerpt = array('text' => $excerpt, 'context' => $text);
  807. foreach ($this->InlineTypes[$marker] as $inlineType)
  808. {
  809. # check to see if the current inline type is nestable in the current context
  810. if ( ! empty($nonNestables) and in_array($inlineType, $nonNestables))
  811. {
  812. continue;
  813. }
  814. $Inline = $this->{'inline'.$inlineType}($Excerpt);
  815. if ( ! isset($Inline))
  816. {
  817. continue;
  818. }
  819. # makes sure that the inline belongs to "our" marker
  820. if (isset($Inline['position']) and $Inline['position'] > $markerPosition)
  821. {
  822. continue;
  823. }
  824. # sets a default inline position
  825. if ( ! isset($Inline['position']))
  826. {
  827. $Inline['position'] = $markerPosition;
  828. }
  829. # cause the new element to 'inherit' our non nestables
  830. foreach ($nonNestables as $non_nestable)
  831. {
  832. $Inline['element']['nonNestables'][] = $non_nestable;
  833. }
  834. # the text that comes before the inline
  835. $unmarkedText = substr($text, 0, $Inline['position']);
  836. # compile the unmarked text
  837. $markup .= $this->unmarkedText($unmarkedText);
  838. # compile the inline
  839. $markup .= isset($Inline['markup']) ? $Inline['markup'] : $this->element($Inline['element']);
  840. # remove the examined text
  841. $text = substr($text, $Inline['position'] + $Inline['extent']);
  842. continue 2;
  843. }
  844. # the marker does not belong to an inline
  845. $unmarkedText = substr($text, 0, $markerPosition + 1);
  846. $markup .= $this->unmarkedText($unmarkedText);
  847. $text = substr($text, $markerPosition + 1);
  848. }
  849. $markup .= $this->unmarkedText($text);
  850. return $markup;
  851. }
  852. #
  853. # ~
  854. #
  855. protected function inlineCode($Excerpt)
  856. {
  857. $marker = $Excerpt['text'][0];
  858. if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/s', $Excerpt['text'], $matches))
  859. {
  860. $text = $matches[2];
  861. $text = preg_replace("/[ ]*\n/", ' ', $text);
  862. return array(
  863. 'extent' => strlen($matches[0]),
  864. 'element' => array(
  865. 'name' => 'code',
  866. 'text' => $text,
  867. ),
  868. );
  869. }
  870. }
  871. protected function inlineEmailTag($Excerpt)
  872. {
  873. $hostnameLabel = '[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?';
  874. $commonMarkEmail = '[a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]++@'
  875. . $hostnameLabel . '(?:\.' . $hostnameLabel . ')*';
  876. if (strpos($Excerpt['text'], '>') !== false
  877. and preg_match("/^<((mailto:)?$commonMarkEmail)>/i", $Excerpt['text'], $matches)
  878. ){
  879. $url = $matches[1];
  880. if ( ! isset($matches[2]))
  881. {
  882. $url = 'mailto:' . $url;
  883. }
  884. return array(
  885. 'extent' => strlen($matches[0]),
  886. 'element' => array(
  887. 'name' => 'a',
  888. 'text' => $matches[1],
  889. 'attributes' => array(
  890. 'href' => $url,
  891. ),
  892. ),
  893. );
  894. }
  895. }
  896. protected function inlineEmphasis($Excerpt)
  897. {
  898. if ( ! isset($Excerpt['text'][1]))
  899. {
  900. return;
  901. }
  902. $marker = $Excerpt['text'][0];
  903. if ($Excerpt['text'][1] === $marker and preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches))
  904. {
  905. $emphasis = 'strong';
  906. }
  907. elseif (preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches))
  908. {
  909. $emphasis = 'em';
  910. }
  911. else
  912. {
  913. return;
  914. }
  915. return array(
  916. 'extent' => strlen($matches[0]),
  917. 'element' => array(
  918. 'name' => $emphasis,
  919. 'handler' => 'line',
  920. 'text' => $matches[1],
  921. ),
  922. );
  923. }
  924. protected function inlineEscapeSequence($Excerpt)
  925. {
  926. if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters))
  927. {
  928. return array(
  929. 'markup' => $Excerpt['text'][1],
  930. 'extent' => 2,
  931. );
  932. }
  933. }
  934. protected function inlineImage($Excerpt)
  935. {
  936. if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '[')
  937. {
  938. return;
  939. }
  940. $Excerpt['text']= substr($Excerpt['text'], 1);
  941. $Link = $this->inlineLink($Excerpt);
  942. if ($Link === null)
  943. {
  944. return;
  945. }
  946. $Inline = array(
  947. 'extent' => $Link['extent'] + 1,
  948. 'element' => array(
  949. 'name' => 'img',
  950. 'attributes' => array(
  951. 'src' => $Link['element']['attributes']['href'],
  952. 'alt' => $Link['element']['text'],
  953. ),
  954. ),
  955. );
  956. $Inline['element']['attributes'] += $Link['element']['attributes'];
  957. unset($Inline['element']['attributes']['href']);
  958. return $Inline;
  959. }
  960. protected function inlineLink($Excerpt)
  961. {
  962. $Element = array(
  963. 'name' => 'a',
  964. 'handler' => 'line',
  965. 'nonNestables' => array('Url', 'Link'),
  966. 'text' => null,
  967. 'attributes' => array(
  968. 'href' => null,
  969. 'title' => null,
  970. ),
  971. );
  972. $extent = 0;
  973. $remainder = $Excerpt['text'];
  974. if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches))
  975. {
  976. $Element['text'] = $matches[1];
  977. $extent += strlen($matches[0]);
  978. $remainder = substr($remainder, $extent);
  979. }
  980. else
  981. {
  982. return;
  983. }
  984. if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*"|\'[^\']*\'))?\s*[)]/', $remainder, $matches))
  985. {
  986. $Element['attributes']['href'] = $matches[1];
  987. if (isset($matches[2]))
  988. {
  989. $Element['attributes']['title'] = substr($matches[2], 1, - 1);
  990. }
  991. $extent += strlen($matches[0]);
  992. }
  993. else
  994. {
  995. if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches))
  996. {
  997. $definition = strlen($matches[1]) ? $matches[1] : $Element['text'];
  998. $definition = strtolower($definition);
  999. $extent += strlen($matches[0]);
  1000. }
  1001. else
  1002. {
  1003. $definition = strtolower($Element['text']);
  1004. }
  1005. if ( ! isset($this->DefinitionData['Reference'][$definition]))
  1006. {
  1007. return;
  1008. }
  1009. $Definition = $this->DefinitionData['Reference'][$definition];
  1010. $Element['attributes']['href'] = $Definition['url'];
  1011. $Element['attributes']['title'] = $Definition['title'];
  1012. }
  1013. return array(
  1014. 'extent' => $extent,
  1015. 'element' => $Element,
  1016. );
  1017. }
  1018. protected function inlineMarkup($Excerpt)
  1019. {
  1020. if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false)
  1021. {
  1022. return;
  1023. }
  1024. if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*[ ]*>/s', $Excerpt['text'], $matches))
  1025. {
  1026. return array(
  1027. 'markup' => $matches[0],
  1028. 'extent' => strlen($matches[0]),
  1029. );
  1030. }
  1031. if ($Excerpt['text'][1] === '!' and preg_match('/^<!---?[^>-](?:-?[^-])*-->/s', $Excerpt['text'], $matches))
  1032. {
  1033. return array(
  1034. 'markup' => $matches[0],
  1035. 'extent' => strlen($matches[0]),
  1036. );
  1037. }
  1038. if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches))
  1039. {
  1040. return array(
  1041. 'markup' => $matches[0],
  1042. 'extent' => strlen($matches[0]),
  1043. );
  1044. }
  1045. }
  1046. protected function inlineSpecialCharacter($Excerpt)
  1047. {
  1048. if ($Excerpt['text'][0] === '&' and ! preg_match('/^&#?\w+;/', $Excerpt['text']))
  1049. {
  1050. return array(
  1051. 'markup' => '&amp;',
  1052. 'extent' => 1,
  1053. );
  1054. }
  1055. $SpecialCharacter = array('>' => 'gt', '<' => 'lt', '"' => 'quot');
  1056. if (isset($SpecialCharacter[$Excerpt['text'][0]]))
  1057. {
  1058. return array(
  1059. 'markup' => '&'.$SpecialCharacter[$Excerpt['text'][0]].';',
  1060. 'extent' => 1,
  1061. );
  1062. }
  1063. }
  1064. protected function inlineStrikethrough($Excerpt)
  1065. {
  1066. if ( ! isset($Excerpt['text'][1]))
  1067. {
  1068. return;
  1069. }
  1070. if ($Excerpt['text'][1] === '~' and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches))
  1071. {
  1072. return array(
  1073. 'extent' => strlen($matches[0]),
  1074. 'element' => array(
  1075. 'name' => 'del',
  1076. 'text' => $matches[1],
  1077. 'handler' => 'line',
  1078. ),
  1079. );
  1080. }
  1081. }
  1082. protected function inlineUrl($Excerpt)
  1083. {
  1084. if ($this->urlsLinked !== true or ! isset($Excerpt['text'][2]) or $Excerpt['text'][2] !== '/')
  1085. {
  1086. return;
  1087. }
  1088. if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE))
  1089. {
  1090. $url = $matches[0][0];
  1091. $Inline = array(
  1092. 'extent' => strlen($matches[0][0]),
  1093. 'position' => $matches[0][1],
  1094. 'element' => array(
  1095. 'name' => 'a',
  1096. 'text' => $url,
  1097. 'attributes' => array(
  1098. 'href' => $url,
  1099. ),
  1100. ),
  1101. );
  1102. return $Inline;
  1103. }
  1104. }
  1105. protected function inlineUrlTag($Excerpt)
  1106. {
  1107. if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches))
  1108. {
  1109. $url = $matches[1];
  1110. return array(
  1111. 'extent' => strlen($matches[0]),
  1112. 'element' => array(
  1113. 'name' => 'a',
  1114. 'text' => $url,
  1115. 'attributes' => array(
  1116. 'href' => $url,
  1117. ),
  1118. ),
  1119. );
  1120. }
  1121. }
  1122. # ~
  1123. protected function unmarkedText($text)
  1124. {
  1125. if ($this->breaksEnabled)
  1126. {
  1127. $text = preg_replace('/[ ]*\n/', "<br />\n", $text);
  1128. }
  1129. else
  1130. {
  1131. $text = preg_replace('/(?:[ ][ ]+|[ ]*\\\\)\n/', "<br />\n", $text);
  1132. $text = str_replace(" \n", "\n", $text);
  1133. }
  1134. return $text;
  1135. }
  1136. #
  1137. # Handlers
  1138. #
  1139. protected function element(array $Element)
  1140. {
  1141. if ($this->safeMode)
  1142. {
  1143. $Element = $this->sanitiseElement($Element);
  1144. }
  1145. $markup = '<'.$Element['name'];
  1146. if (isset($Element['attributes']))
  1147. {
  1148. foreach ($Element['attributes'] as $name => $value)
  1149. {
  1150. if ($value === null)
  1151. {
  1152. continue;
  1153. }
  1154. $markup .= ' '.$name.'="'.self::escape($value).'"';
  1155. }
  1156. }
  1157. if (isset($Element['text']))
  1158. {
  1159. $markup .= '>';
  1160. if (!isset($Element['nonNestables']))
  1161. {
  1162. $Element['nonNestables'] = array();
  1163. }
  1164. if (isset($Element['handler']))
  1165. {
  1166. $markup .= $this->{$Element['handler']}($Element['text'], $Element['nonNestables']);
  1167. }
  1168. else
  1169. {
  1170. $markup .= self::escape($Element['text'], true);
  1171. }
  1172. $markup .= '</'.$Element['name'].'>';
  1173. }
  1174. else
  1175. {
  1176. $markup .= ' />';
  1177. }
  1178. return $markup;
  1179. }
  1180. protected function elements(array $Elements)
  1181. {
  1182. $markup = '';
  1183. foreach ($Elements as $Element)
  1184. {
  1185. $markup .= "\n" . $this->element($Element);
  1186. }
  1187. $markup .= "\n";
  1188. return $markup;
  1189. }
  1190. # ~
  1191. protected function li($lines)
  1192. {
  1193. $markup = $this->lines($lines);
  1194. $trimmedMarkup = trim($markup);
  1195. if ( ! in_array('', $lines) and substr($trimmedMarkup, 0, 3) === '<p>')
  1196. {
  1197. $markup = $trimmedMarkup;
  1198. $markup = substr($markup, 3);
  1199. $position = strpos($markup, "</p>");
  1200. $markup = substr_replace($markup, '', $position, 4);
  1201. }
  1202. return $markup;
  1203. }
  1204. #
  1205. # Deprecated Methods
  1206. #
  1207. function parse($text)
  1208. {
  1209. $markup = $this->text($text);
  1210. return $markup;
  1211. }
  1212. protected function sanitiseElement(array $Element)
  1213. {
  1214. static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/';
  1215. static $safeUrlNameToAtt = array(
  1216. 'a' => 'href',
  1217. 'img' => 'src',
  1218. );
  1219. if (isset($safeUrlNameToAtt[$Element['name']]))
  1220. {
  1221. $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]);
  1222. }
  1223. if ( ! empty($Element['attributes']))
  1224. {
  1225. foreach ($Element['attributes'] as $att => $val)
  1226. {
  1227. # filter out badly parsed attribute
  1228. if ( ! preg_match($goodAttribute, $att))
  1229. {
  1230. unset($Element['attributes'][$att]);
  1231. }
  1232. # dump onevent attribute
  1233. elseif (self::striAtStart($att, 'on'))
  1234. {
  1235. unset($Element['attributes'][$att]);
  1236. }
  1237. }
  1238. }
  1239. return $Element;
  1240. }
  1241. protected function filterUnsafeUrlInAttribute(array $Element, $attribute)
  1242. {
  1243. foreach ($this->safeLinksWhitelist as $scheme)
  1244. {
  1245. if (self::striAtStart($Element['attributes'][$attribute], $scheme))
  1246. {
  1247. return $Element;
  1248. }
  1249. }
  1250. $Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]);
  1251. return $Element;
  1252. }
  1253. #
  1254. # Static Methods
  1255. #
  1256. protected static function escape($text, $allowQuotes = false)
  1257. {
  1258. return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8');
  1259. }
  1260. protected static function striAtStart($string, $needle)
  1261. {
  1262. $len = strlen($needle);
  1263. if ($len > strlen($string))
  1264. {
  1265. return false;
  1266. }
  1267. else
  1268. {
  1269. return strtolower(substr($string, 0, $len)) === strtolower($needle);
  1270. }
  1271. }
  1272. static function instance($name = 'default')
  1273. {
  1274. if (isset(self::$instances[$name]))
  1275. {
  1276. return self::$instances[$name];
  1277. }
  1278. $instance = new static();
  1279. self::$instances[$name] = $instance;
  1280. return $instance;
  1281. }
  1282. private static $instances = array();
  1283. #
  1284. # Fields
  1285. #
  1286. protected $DefinitionData;
  1287. #
  1288. # Read-Only
  1289. protected $specialCharacters = array(
  1290. '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|',
  1291. );
  1292. protected $StrongRegex = array(
  1293. '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s',
  1294. '_' => '/^__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us',
  1295. );
  1296. protected $EmRegex = array(
  1297. '*' => '/^[*]((?:\\\\\*|[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s',
  1298. '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us',
  1299. );
  1300. protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*(?:\s*=\s*(?:[^"\'=<>`\s]+|"[^"]*"|\'[^\']*\'))?';
  1301. protected $voidElements = array(
  1302. 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source',
  1303. );
  1304. protected $textLevelElements = array(
  1305. 'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont',
  1306. 'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing',
  1307. 'i', 'rp', 'del', 'code', 'strike', 'marquee',
  1308. 'q', 'rt', 'ins', 'font', 'strong',
  1309. 's', 'tt', 'kbd', 'mark',
  1310. 'u', 'xm', 'sub', 'nobr',
  1311. 'sup', 'ruby',
  1312. 'var', 'span',
  1313. 'wbr', 'time',
  1314. );
  1315. }