Tokenizer.php 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. <?php
  2. /**
  3. * Generate a list of ECML tokens
  4. *
  5. * @access private
  6. */
  7. class Elgg_Ecml_Tokenizer {
  8. const TAG_REGEX = '~\\[([a-z0-9\\./]+)([^\\]]+)?\\]~';
  9. const ATTR_SEPARATOR = ' ';
  10. const ATTR_OPERATOR = '=';
  11. const DELIMITER = 'NWMwYjc0ZjhiYTBjYmE2NzgwMmFkZTQzNmYyZDcxMWY3NGFjMDI1ZA';
  12. /**
  13. * @var Elgg_Ecml_Token[]
  14. */
  15. protected $replacedTokens;
  16. /**
  17. * @param string $text
  18. * @return Elgg_Ecml_Token[] array of ECML tokens
  19. */
  20. public function getTokens($text) {
  21. $this->replacedTokens = array();
  22. $text = preg_replace_callback(Elgg_Ecml_Tokenizer::TAG_REGEX, array($this, 'replaceMatch'), $text);
  23. $pieces = explode(Elgg_Ecml_Tokenizer::DELIMITER, $text);
  24. $tokens = array();
  25. $last = count($pieces) - 1;
  26. foreach ($pieces as $i => $piece) {
  27. if ($piece !== '') {
  28. $tokens[] = Elgg_Ecml_Token::factory($piece);
  29. }
  30. if ($i !== $last) {
  31. $tokens[] = $this->replacedTokens[$i];
  32. }
  33. }
  34. $this->replacedTokens = array();
  35. return $tokens;
  36. }
  37. /**
  38. * Render an ECML tag
  39. *
  40. * @param array $matches Array of string matches for a particular tag
  41. * @return string
  42. */
  43. protected function replaceMatch($matches) {
  44. // matches = [full tag, keyword, attributes?]
  45. $attributes = array();
  46. if (isset($matches[2])) {
  47. $success = true;
  48. $attributes = $this->tokenizeAttributes($matches[2], $success);
  49. if (!$success) {
  50. // failed to parse attributes, make a plain text token
  51. $this->replacedTokens[] = Elgg_Ecml_Token::factory($matches[0]);
  52. return Elgg_Ecml_Tokenizer::DELIMITER;
  53. }
  54. }
  55. $token = Elgg_Ecml_Token::factory($matches[0], $matches[1], $attributes);
  56. $this->replacedTokens[] = $token;
  57. return Elgg_Ecml_Tokenizer::DELIMITER;
  58. }
  59. /**
  60. * Tokenize the ECML tag attributes
  61. *
  62. * @param string $string Attribute string
  63. * @param bool $success
  64. * @return array
  65. */
  66. protected function tokenizeAttributes($string, &$success = null) {
  67. $success = true;
  68. $string = trim($string);
  69. if (empty($string)) {
  70. return array();
  71. }
  72. $attributes = array();
  73. $pos = 0;
  74. $char = elgg_substr($string, $pos, 1);
  75. // working var for assembling name and values
  76. $operand = $name = '';
  77. while ($char !== false && $char !== '') {
  78. switch ($char) {
  79. // handle quoted names/values
  80. case '"':
  81. case "'":
  82. $quote = $char;
  83. $next_char = elgg_substr($string, ++$pos, 1);
  84. while ($next_char != $quote) {
  85. // note: mb_substr returns "" instead of false...
  86. if ($next_char === false || $next_char === '') {
  87. // no matching quote. bail.
  88. $success = false;
  89. return array();
  90. } elseif ($next_char === '\\') {
  91. // allow escaping quotes
  92. $after_escape = elgg_substr($string, $pos + 1, 1);
  93. if ($after_escape === $quote) {
  94. $operand .= $quote;
  95. $pos += 2; // skip escape and quote
  96. $next_char = elgg_substr($string, $pos, 1);
  97. continue;
  98. }
  99. }
  100. $operand .= $next_char;
  101. $next_char = elgg_substr($string, ++$pos, 1);
  102. }
  103. break;
  104. case self::ATTR_SEPARATOR:
  105. $this->setAttribute($operand, $name, $attributes);
  106. break;
  107. case self::ATTR_OPERATOR:
  108. // save name, switch to value
  109. $name = $operand;
  110. $operand = '';
  111. break;
  112. default:
  113. $operand .= $char;
  114. break;
  115. }
  116. $char = elgg_substr($string, ++$pos, 1);
  117. }
  118. // need to get the last attr
  119. $this->setAttribute($operand, $name, $attributes);
  120. return $attributes;
  121. }
  122. protected function setAttribute(&$operand, &$name, &$attributes) {
  123. // normalize true and false
  124. if ($operand == 'true') {
  125. $operand = true;
  126. } elseif ($operand == 'false') {
  127. $operand = false;
  128. }
  129. if ($name !== '') {
  130. $attributes[$name] = $operand;
  131. $operand = $name = '';
  132. } elseif ($operand !== '') {
  133. // boolean attribute (no value)
  134. $attributes[$operand] = true;
  135. $operand = '';
  136. }
  137. }
  138. }