output.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600
  1. <?php
  2. /**
  3. * Output functions
  4. * Processing text for output such as pulling out URLs and extracting excerpts
  5. *
  6. * @package Elgg
  7. * @subpackage Core
  8. */
  9. /**
  10. * Takes a string and turns any URLs into formatted links
  11. *
  12. * @param string $text The input string
  13. *
  14. * @return string The output string with formatted links
  15. */
  16. function parse_urls($text) {
  17. // URI specification: http://www.ietf.org/rfc/rfc3986.txt
  18. // This varies from the specification in the following ways:
  19. // * Supports non-ascii characters
  20. // * Does not allow parentheses and single quotes
  21. // * Cuts off commas, exclamation points, and periods off as last character
  22. // @todo this causes problems with <attr = "val">
  23. // must be in <attr="val"> format (no space).
  24. // By default htmlawed rewrites tags to this format.
  25. // if PHP supported conditional negative lookbehinds we could use this:
  26. // $r = preg_replace_callback('/(?<!=)(?<![ ])?(?<!["\'])((ht|f)tps?:\/\/[^\s\r\n\t<>"\'\!\(\),]+)/i',
  27. $r = preg_replace_callback('/(?<![=\/"\'])((ht|f)tps?:\/\/[^\s\r\n\t<>"\']+)/i',
  28. create_function(
  29. '$matches',
  30. '
  31. $url = $matches[1];
  32. $punc = "";
  33. $last = substr($url, -1, 1);
  34. if (in_array($last, array(".", "!", ",", "(", ")"))) {
  35. $punc = $last;
  36. $url = rtrim($url, ".!,()");
  37. }
  38. $urltext = str_replace("/", "/<wbr />", $url);
  39. return "<a href=\"$url\" rel=\"nofollow\">$urltext</a>$punc";
  40. '
  41. ), $text);
  42. return $r;
  43. }
  44. /**
  45. * Create paragraphs from text with line spacing
  46. *
  47. * @param string $string The string
  48. *
  49. * @return string
  50. **/
  51. function elgg_autop($string) {
  52. return _elgg_services()->autoP->process($string);
  53. }
  54. /**
  55. * Returns an excerpt.
  56. * Will return up to n chars stopping at the nearest space.
  57. * If no spaces are found (like in Japanese) will crop off at the
  58. * n char mark. Adds ... if any text was chopped.
  59. *
  60. * @param string $text The full text to excerpt
  61. * @param int $num_chars Return a string up to $num_chars long
  62. *
  63. * @return string
  64. * @since 1.7.2
  65. */
  66. function elgg_get_excerpt($text, $num_chars = 250) {
  67. $text = trim(elgg_strip_tags($text));
  68. $string_length = elgg_strlen($text);
  69. if ($string_length <= $num_chars) {
  70. return $text;
  71. }
  72. // handle cases
  73. $excerpt = elgg_substr($text, 0, $num_chars);
  74. $space = elgg_strrpos($excerpt, ' ', 0);
  75. // don't crop if can't find a space.
  76. if ($space === false) {
  77. $space = $num_chars;
  78. }
  79. $excerpt = trim(elgg_substr($excerpt, 0, $space));
  80. if ($string_length != elgg_strlen($excerpt)) {
  81. $excerpt .= '...';
  82. }
  83. return $excerpt;
  84. }
  85. /**
  86. * Handles formatting of ampersands in urls
  87. *
  88. * @param string $url The URL
  89. *
  90. * @return string
  91. * @since 1.7.1
  92. */
  93. function elgg_format_url($url) {
  94. return preg_replace('/&(?!amp;)/', '&amp;', $url);
  95. }
  96. /**
  97. * Format bytes to a human readable format
  98. *
  99. * @param int $size File size in bytes to format
  100. *
  101. * @param int $precision Precision to round formatting bytes to
  102. *
  103. * @return string
  104. * @since 1.9.0
  105. */
  106. function elgg_format_bytes($size, $precision = 2) {
  107. if (!$size || $size < 0) {
  108. return false;
  109. }
  110. $base = log($size) / log(1024);
  111. $suffixes = array('B', 'kB', 'MB', 'GB', 'TB');
  112. return round(pow(1024, $base - floor($base)), $precision) . ' ' . $suffixes[floor($base)];
  113. }
  114. /**
  115. * Converts an associative array into a string of well-formed attributes
  116. *
  117. * @note usually for HTML, but could be useful for XML too...
  118. *
  119. * @param array $attrs An associative array of attr => val pairs
  120. *
  121. * @return string HTML attributes to be inserted into a tag (e.g., <tag $attrs>)
  122. */
  123. function elgg_format_attributes(array $attrs = array()) {
  124. if (!is_array($attrs) || !count($attrs)) {
  125. return '';
  126. }
  127. $attrs = _elgg_clean_vars($attrs);
  128. $attributes = array();
  129. if (isset($attrs['js'])) {
  130. elgg_deprecated_notice('Use associative array of attr => val pairs instead of $vars[\'js\']', 1.8);
  131. if (!empty($attrs['js'])) {
  132. $attributes[] = $attrs['js'];
  133. }
  134. unset($attrs['js']);
  135. }
  136. foreach ($attrs as $attr => $val) {
  137. $attr = strtolower($attr);
  138. if ($val === true) {
  139. $val = $attr; //e.g. checked => true ==> checked="checked"
  140. }
  141. /**
  142. * Ignore non-array values and allow attribute values to be an array
  143. * <code>
  144. * $attrs = array(
  145. * 'entity' => <\ElggObject>, // will be ignored
  146. * 'class' => array('elgg-input', 'elgg-input-text'), // will be imploded with spaces
  147. * 'style' => array('margin-left:10px;', 'color: #666;'), // will be imploded with spaces
  148. * 'alt' => 'Alt text', // will be left as is
  149. * );
  150. * </code>
  151. */
  152. if ($val !== NULL && $val !== false && (is_array($val) || !is_object($val))) {
  153. if (is_array($val)) {
  154. $val = implode(' ', $val);
  155. }
  156. $val = htmlspecialchars($val, ENT_QUOTES, 'UTF-8', false);
  157. $attributes[] = "$attr=\"$val\"";
  158. }
  159. }
  160. return implode(' ', $attributes);
  161. }
  162. /**
  163. * Format an HTML element
  164. *
  165. * @param string $tag_name The element tagName. e.g. "div". This will not be validated.
  166. *
  167. * @param array $attributes The element attributes. This is passed to elgg_format_attributes().
  168. *
  169. * @param string $text The contents of the element. Assumed to be HTML unless encode_text is true.
  170. *
  171. * @param array $options Options array with keys:
  172. *
  173. * encode_text => (bool, default false) If true, $text will be HTML-escaped. Already-escaped entities
  174. * will not be double-escaped.
  175. *
  176. * double_encode => (bool, default false) If true, the $text HTML escaping will be allowed to double
  177. * encode HTML entities: '&times;' will become '&amp;times;'
  178. *
  179. * is_void => (bool) If given, this determines whether the function will return just the open tag.
  180. * Otherwise this will be determined by the tag name according to this list:
  181. * http://www.w3.org/html/wg/drafts/html/master/single-page.html#void-elements
  182. *
  183. * is_xml => (bool, default false) If true, void elements will be formatted like "<tag />"
  184. *
  185. * @return string
  186. * @throws InvalidArgumentException
  187. * @since 1.9.0
  188. */
  189. function elgg_format_element($tag_name, array $attributes = array(), $text = '', array $options = array()) {
  190. if (!is_string($tag_name)) {
  191. throw new \InvalidArgumentException('$tag_name is required');
  192. }
  193. if (isset($options['is_void'])) {
  194. $is_void = $options['is_void'];
  195. } else {
  196. // from http://www.w3.org/TR/html-markup/syntax.html#syntax-elements
  197. $is_void = in_array(strtolower($tag_name), array(
  198. 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem',
  199. 'meta', 'param', 'source', 'track', 'wbr'
  200. ));
  201. }
  202. if (!empty($options['encode_text'])) {
  203. $double_encode = empty($options['double_encode']) ? false : true;
  204. $text = htmlspecialchars($text, ENT_QUOTES, 'UTF-8', $double_encode);
  205. }
  206. if ($attributes) {
  207. $attrs = elgg_format_attributes($attributes);
  208. if ($attrs !== '') {
  209. $attrs = " $attrs";
  210. }
  211. } else {
  212. $attrs = '';
  213. }
  214. if ($is_void) {
  215. return empty($options['is_xml']) ? "<{$tag_name}{$attrs}>" : "<{$tag_name}{$attrs} />";
  216. } else {
  217. return "<{$tag_name}{$attrs}>$text</$tag_name>";
  218. }
  219. }
  220. /**
  221. * Preps an associative array for use in {@link elgg_format_attributes()}.
  222. *
  223. * Removes all the junk that {@link elgg_view()} puts into $vars.
  224. * Maintains backward compatibility with attributes like 'internalname' and 'internalid'
  225. *
  226. * @note This function is called automatically by elgg_format_attributes(). No need to
  227. * call it yourself before using elgg_format_attributes().
  228. *
  229. * @param array $vars The raw $vars array with all it's dirtiness (config, url, etc.)
  230. *
  231. * @return array The array, ready to be used in elgg_format_attributes().
  232. * @access private
  233. */
  234. function _elgg_clean_vars(array $vars = array()) {
  235. unset($vars['config']);
  236. unset($vars['url']);
  237. unset($vars['user']);
  238. // backwards compatibility code
  239. if (isset($vars['internalname'])) {
  240. if (!isset($vars['__ignoreInternalname'])) {
  241. $vars['name'] = $vars['internalname'];
  242. }
  243. unset($vars['internalname']);
  244. }
  245. if (isset($vars['internalid'])) {
  246. if (!isset($vars['__ignoreInternalid'])) {
  247. $vars['id'] = $vars['internalid'];
  248. }
  249. unset($vars['internalid']);
  250. }
  251. if (isset($vars['__ignoreInternalid'])) {
  252. unset($vars['__ignoreInternalid']);
  253. }
  254. if (isset($vars['__ignoreInternalname'])) {
  255. unset($vars['__ignoreInternalname']);
  256. }
  257. return $vars;
  258. }
  259. /**
  260. * Converts shorthand urls to absolute urls.
  261. *
  262. * If the url is already absolute or protocol-relative, no change is made.
  263. *
  264. * @example
  265. * elgg_normalize_url(''); // 'http://my.site.com/'
  266. * elgg_normalize_url('dashboard'); // 'http://my.site.com/dashboard'
  267. * elgg_normalize_url('http://google.com/'); // no change
  268. * elgg_normalize_url('//google.com/'); // no change
  269. *
  270. * @param string $url The URL to normalize
  271. *
  272. * @return string The absolute url
  273. */
  274. function elgg_normalize_url($url) {
  275. // see https://bugs.php.net/bug.php?id=51192
  276. // from the bookmarks save action.
  277. $php_5_2_13_and_below = version_compare(PHP_VERSION, '5.2.14', '<');
  278. $php_5_3_0_to_5_3_2 = version_compare(PHP_VERSION, '5.3.0', '>=') &&
  279. version_compare(PHP_VERSION, '5.3.3', '<');
  280. if ($php_5_2_13_and_below || $php_5_3_0_to_5_3_2) {
  281. $tmp_address = str_replace("-", "", $url);
  282. $validated = filter_var($tmp_address, FILTER_VALIDATE_URL);
  283. } else {
  284. $validated = filter_var($url, FILTER_VALIDATE_URL);
  285. }
  286. // work around for handling absoluate IRIs (RFC 3987) - see #4190
  287. if (!$validated && (strpos($url, 'http:') === 0) || (strpos($url, 'https:') === 0)) {
  288. $validated = true;
  289. }
  290. if ($validated) {
  291. // all normal URLs including mailto:
  292. return $url;
  293. } elseif (preg_match("#^(\#|\?|//)#i", $url)) {
  294. // '//example.com' (Shortcut for protocol.)
  295. // '?query=test', #target
  296. return $url;
  297. } elseif (stripos($url, 'javascript:') === 0 || stripos($url, 'mailto:') === 0) {
  298. // 'javascript:' and 'mailto:'
  299. // Not covered in FILTER_VALIDATE_URL
  300. return $url;
  301. } elseif (preg_match("#^[^/]*\.php(\?.*)?$#i", $url)) {
  302. // 'install.php', 'install.php?step=step'
  303. return elgg_get_site_url() . $url;
  304. } elseif (preg_match("#^[^/?]*\.#i", $url)) {
  305. // 'example.com', 'example.com/subpage'
  306. return "http://$url";
  307. } else {
  308. // 'page/handler', 'mod/plugin/file.php'
  309. // trim off any leading / because the site URL is stored
  310. // with a trailing /
  311. return elgg_get_site_url() . ltrim($url, '/');
  312. }
  313. }
  314. /**
  315. * When given a title, returns a version suitable for inclusion in a URL
  316. *
  317. * @param string $title The title
  318. *
  319. * @return string The optimized title
  320. * @since 1.7.2
  321. */
  322. function elgg_get_friendly_title($title) {
  323. // return a URL friendly title to short circuit normal title formatting
  324. $params = array('title' => $title);
  325. $result = elgg_trigger_plugin_hook('format', 'friendly:title', $params, null);
  326. if ($result) {
  327. return $result;
  328. }
  329. // titles are often stored HTML encoded
  330. $title = html_entity_decode($title, ENT_QUOTES, 'UTF-8');
  331. $title = \Elgg\Translit::urlize($title);
  332. return $title;
  333. }
  334. /**
  335. * Formats a UNIX timestamp in a friendly way (eg "less than a minute ago")
  336. *
  337. * @see elgg_view_friendly_time()
  338. *
  339. * @param int $time A UNIX epoch timestamp
  340. * @param int $current_time Current UNIX epoch timestamp (optional)
  341. *
  342. * @return string The friendly time string
  343. * @since 1.7.2
  344. */
  345. function elgg_get_friendly_time($time, $current_time = null) {
  346. if (!$current_time) {
  347. $current_time = time();
  348. }
  349. // return a time string to short circuit normal time formatting
  350. $params = array('time' => $time, 'current_time' => $current_time);
  351. $result = elgg_trigger_plugin_hook('format', 'friendly:time', $params, null);
  352. if ($result) {
  353. return $result;
  354. }
  355. $diff = abs((int)$current_time - (int)$time);
  356. $minute = 60;
  357. $hour = $minute * 60;
  358. $day = $hour * 24;
  359. if ($diff < $minute) {
  360. return elgg_echo("friendlytime:justnow");
  361. }
  362. if ($diff < $hour) {
  363. $granularity = ':minutes';
  364. $diff = round($diff / $minute);
  365. } else if ($diff < $day) {
  366. $granularity = ':hours';
  367. $diff = round($diff / $hour);
  368. } else {
  369. $granularity = ':days';
  370. $diff = round($diff / $day);
  371. }
  372. if ($diff == 0) {
  373. $diff = 1;
  374. }
  375. $future = ((int)$current_time - (int)$time < 0) ? ':future' : '';
  376. $singular = ($diff == 1) ? ':singular' : '';
  377. return elgg_echo("friendlytime{$future}{$granularity}{$singular}", array($diff));
  378. }
  379. /**
  380. * Returns a human-readable message for PHP's upload error codes
  381. *
  382. * @param int $error_code The code as stored in $_FILES['name']['error']
  383. * @return string
  384. */
  385. function elgg_get_friendly_upload_error($error_code) {
  386. switch ($error_code) {
  387. case UPLOAD_ERR_OK:
  388. return '';
  389. case UPLOAD_ERR_INI_SIZE:
  390. $key = 'ini_size';
  391. break;
  392. case UPLOAD_ERR_FORM_SIZE:
  393. $key = 'form_size';
  394. break;
  395. case UPLOAD_ERR_PARTIAL:
  396. $key = 'partial';
  397. break;
  398. case UPLOAD_ERR_NO_FILE:
  399. $key = 'no_file';
  400. break;
  401. case UPLOAD_ERR_NO_TMP_DIR:
  402. $key = 'no_tmp_dir';
  403. break;
  404. case UPLOAD_ERR_CANT_WRITE:
  405. $key = 'cant_write';
  406. break;
  407. case UPLOAD_ERR_EXTENSION:
  408. $key = 'extension';
  409. break;
  410. default:
  411. $key = 'unknown';
  412. break;
  413. }
  414. return elgg_echo("upload:error:$key");
  415. }
  416. /**
  417. * Strip tags and offer plugins the chance.
  418. * Plugins register for output:strip_tags plugin hook.
  419. * Original string included in $params['original_string']
  420. *
  421. * @param string $string Formatted string
  422. * @param string $allowable_tags Optional parameter to specify tags which should not be stripped
  423. *
  424. * @return string String run through strip_tags() and any plugin hooks.
  425. */
  426. function elgg_strip_tags($string, $allowable_tags = null) {
  427. $params['original_string'] = $string;
  428. $params['allowable_tags'] = $allowable_tags;
  429. $string = strip_tags($string, $allowable_tags);
  430. $string = elgg_trigger_plugin_hook('format', 'strip_tags', $params, $string);
  431. return $string;
  432. }
  433. /**
  434. * Apply html_entity_decode() to a string while re-entitising HTML
  435. * special char entities to prevent them from being decoded back to their
  436. * unsafe original forms.
  437. *
  438. * This relies on html_entity_decode() not translating entities when
  439. * doing so leaves behind another entity, e.g. &amp;gt; if decoded would
  440. * create &gt; which is another entity itself. This seems to escape the
  441. * usual behaviour where any two paired entities creating a HTML tag are
  442. * usually decoded, i.e. a lone &gt; is not decoded, but &lt;foo&gt; would
  443. * be decoded to <foo> since it creates a full tag.
  444. *
  445. * Note: This function is poorly explained in the manual - which is really
  446. * bad given its potential for misuse on user input already escaped elsewhere.
  447. * Stackoverflow is littered with advice to use this function in the precise
  448. * way that would lead to user input being capable of injecting arbitrary HTML.
  449. *
  450. * @param string $string
  451. *
  452. * @return string
  453. *
  454. * @author Pádraic Brady
  455. * @copyright Copyright (c) 2010 Pádraic Brady (http://blog.astrumfutura.com)
  456. * @license Released under dual-license GPL2/MIT by explicit permission of Pádraic Brady
  457. *
  458. * @access private
  459. */
  460. function _elgg_html_decode($string) {
  461. $string = str_replace(
  462. array('&gt;', '&lt;', '&amp;', '&quot;', '&#039;'),
  463. array('&amp;gt;', '&amp;lt;', '&amp;amp;', '&amp;quot;', '&amp;#039;'),
  464. $string
  465. );
  466. $string = html_entity_decode($string, ENT_NOQUOTES, 'UTF-8');
  467. $string = str_replace(
  468. array('&amp;gt;', '&amp;lt;', '&amp;amp;', '&amp;quot;', '&amp;#039;'),
  469. array('&gt;', '&lt;', '&amp;', '&quot;', '&#039;'),
  470. $string
  471. );
  472. return $string;
  473. }
  474. /**
  475. * Prepares query string for output to prevent CSRF attacks.
  476. *
  477. * @param string $string
  478. * @return string
  479. *
  480. * @access private
  481. */
  482. function _elgg_get_display_query($string) {
  483. //encode <,>,&, quotes and characters above 127
  484. if (function_exists('mb_convert_encoding')) {
  485. $display_query = mb_convert_encoding($string, 'HTML-ENTITIES', 'UTF-8');
  486. } else {
  487. // if no mbstring extension, we just strip characters
  488. $display_query = preg_replace("/[^\x01-\x7F]/", "", $string);
  489. }
  490. return htmlspecialchars($display_query, ENT_QUOTES, 'UTF-8', false);
  491. }
  492. /**
  493. * Unit tests for Output
  494. *
  495. * @param string $hook unit_test
  496. * @param string $type system
  497. * @param mixed $value Array of tests
  498. * @param mixed $params Params
  499. *
  500. * @return array
  501. * @access private
  502. */
  503. function _elgg_output_unit_test($hook, $type, $value, $params) {
  504. global $CONFIG;
  505. $value[] = "{$CONFIG->path}engine/tests/ElggCoreOutputAutoPTest.php";
  506. return $value;
  507. }
  508. /**
  509. * Initialize the output subsystem.
  510. *
  511. * @return void
  512. * @access private
  513. */
  514. function _elgg_output_init() {
  515. elgg_register_plugin_hook_handler('unit_test', 'system', '_elgg_output_unit_test');
  516. }
  517. return function(\Elgg\EventsService $events, \Elgg\HooksRegistrationService $hooks) {
  518. $events->registerHandler('init', 'system', '_elgg_output_init');
  519. };