| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319 | 
							- <?php
 
- /**
 
-  * Create wrapper P and BR elements in HTML depending on newlines. Useful when
 
-  * users use newlines to signal line and paragraph breaks. In all cases output
 
-  * should be well-formed markup.
 
-  *
 
-  * In DIV elements, Ps are only added when there would be at
 
-  * least two of them.
 
-  * 
 
-  * @package    Elgg.Core
 
-  * @subpackage Output
 
-  */
 
- class ElggAutoP {
 
- 	public $encoding = 'UTF-8';
 
- 	/**
 
- 	 * @var DOMDocument
 
- 	 */
 
- 	protected $_doc = null;
 
- 	/**
 
- 	 * @var DOMXPath
 
- 	 */
 
- 	protected $_xpath = null;
 
- 	protected $_blocks = 'address article area aside blockquote caption col colgroup dd 
 
- 		details div dl dt fieldset figure figcaption footer form h1 h2 h3 h4 h5 h6 header 
 
- 		hr hgroup legend map math menu nav noscript p pre section select style summary
 
- 		table tbody td tfoot th thead tr ul ol option li';
 
- 	/**
 
- 	 * @var array
 
- 	 */
 
- 	protected $_inlines = 'a abbr audio b button canvas caption cite code command datalist
 
- 		del dfn em embed i iframe img input ins kbd keygen label map mark meter object
 
- 		output progress q rp rt ruby s samp script select small source span strong style
 
- 		sub sup textarea time var video wbr';
 
- 	/**
 
- 	 * Descend into these elements to add Ps
 
- 	 *
 
- 	 * @var array
 
- 	 */
 
- 	protected $_descendList = 'article aside blockquote body details div footer form
 
- 		header section';
 
- 	/**
 
- 	 * Add Ps inside these elements
 
- 	 *
 
- 	 * @var array
 
- 	 */
 
- 	protected $_alterList = 'article aside blockquote body details div footer header
 
- 		section';
 
- 	/** @var string */
 
- 	protected $_unique = '';
 
- 	/**
 
- 	 * Constructor
 
- 	 */
 
- 	public function __construct() {
 
- 		$this->_blocks = preg_split('@\\s+@', $this->_blocks);
 
- 		$this->_descendList = preg_split('@\\s+@', $this->_descendList);
 
- 		$this->_alterList = preg_split('@\\s+@', $this->_alterList);
 
- 		$this->_inlines = preg_split('@\\s+@', $this->_inlines);
 
- 		$this->_unique = md5(__FILE__);
 
- 	}
 
- 	/**
 
- 	 * Create wrapper P and BR elements in HTML depending on newlines. Useful when
 
- 	 * users use newlines to signal line and paragraph breaks. In all cases output
 
- 	 * should be well-formed markup.
 
- 	 *
 
- 	 * In DIV, LI, TD, and TH elements, Ps are only added when their would be at
 
- 	 * least two of them.
 
- 	 *
 
- 	 * @param string $html snippet
 
- 	 * @return string|false output or false if parse error occurred
 
- 	 */
 
- 	public function process($html) {
 
- 		// normalize whitespace
 
- 		$html = str_replace(array("\r\n", "\r"), "\n", $html);
 
- 		// allows preserving entities untouched
 
- 		$html = str_replace('&', $this->_unique . 'AMP', $html);
 
- 		$this->_doc = new DOMDocument();
 
- 		// parse to DOM, suppressing loadHTML warnings
 
- 		// http://www.php.net/manual/en/domdocument.loadhtml.php#95463
 
- 		libxml_use_internal_errors(true);
 
- 		// Do not load entities. May be unnecessary, better safe than sorry
 
- 		$disable_load_entities = libxml_disable_entity_loader(true);
 
- 		if (!$this->_doc->loadHTML("<html><meta http-equiv='content-type' " 
 
- 				. "content='text/html; charset={$this->encoding}'><body>{$html}</body>"
 
- 				. "</html>")) {
 
- 			libxml_disable_entity_loader($disable_load_entities);
 
- 			return false;
 
- 		}
 
- 		libxml_disable_entity_loader($disable_load_entities);
 
- 		$this->_xpath = new DOMXPath($this->_doc);
 
- 		// start processing recursively at the BODY element
 
- 		$nodeList = $this->_xpath->query('//body[1]');
 
- 		$this->addParagraphs($nodeList->item(0));
 
- 		// serialize back to HTML
 
- 		$html = $this->_doc->saveHTML();
 
- 		// Note: we create <autop> elements, which will later be converted to paragraphs
 
- 		// split AUTOPs into multiples at /\n\n+/
 
- 		$html = preg_replace('/(' . $this->_unique . 'NL){2,}/', '</autop><autop>', $html);
 
- 		$html = str_replace(array($this->_unique . 'BR', $this->_unique . 'NL', '<br>'),
 
- 				'<br />',
 
- 				$html);
 
- 		$html = str_replace('<br /></autop>', '</autop>', $html);
 
- 		// re-parse so we can handle new AUTOP elements
 
- 		// Do not load entities. May be unnecessary, better safe than sorry
 
- 		$disable_load_entities = libxml_disable_entity_loader(true);
 
- 		if (!$this->_doc->loadHTML($html)) {
 
- 			libxml_disable_entity_loader($disable_load_entities);
 
- 			return false;
 
- 		}
 
- 		libxml_disable_entity_loader($disable_load_entities);
 
- 		// must re-create XPath object after DOM load
 
- 		$this->_xpath = new DOMXPath($this->_doc);
 
- 		// strip AUTOPs that only have comments/whitespace
 
- 		foreach ($this->_xpath->query('//autop') as $autop) {
 
- 			/* @var DOMElement $autop */
 
- 			$hasContent = false;
 
- 			if (trim($autop->textContent) !== '') {
 
- 				$hasContent = true;
 
- 			} else {
 
- 				foreach ($autop->childNodes as $node) {
 
- 					if ($node->nodeType === XML_ELEMENT_NODE) {
 
- 						$hasContent = true;
 
- 						break;
 
- 					}
 
- 				}
 
- 			}
 
- 			if (!$hasContent) {
 
- 				// mark to be later replaced w/ preg_replace (faster than moving nodes out)
 
- 				$autop->setAttribute("r", "1");
 
- 			}
 
- 		}
 
- 		// If a DIV contains a single AUTOP, remove it
 
- 		foreach ($this->_xpath->query('//div') as $el) {
 
- 			/* @var DOMElement $el */
 
- 			$autops = $this->_xpath->query('./autop', $el);
 
- 			if ($autops->length === 1) {
 
- 				$firstAutop = $autops->item(0);
 
- 				/* @var DOMElement $firstAutop */
 
- 				$firstAutop->setAttribute("r", "1");
 
- 			}
 
- 		}
 
- 		$html = $this->_doc->saveHTML();
 
- 		// trim to the contents of BODY
 
- 		$bodyStart = strpos($html, '<body>');
 
- 		$bodyEnd = strpos($html, '</body>', $bodyStart + 6);
 
- 		$html = substr($html, $bodyStart + 6, $bodyEnd - $bodyStart - 6);
 
- 		
 
- 		// strip AUTOPs that should be removed
 
- 		$html = preg_replace('@<autop r="1">(.*?)</autop>@', '\\1', $html);
 
- 		// commit to converting AUTOPs to Ps
 
- 		$html = str_replace('<autop>', "\n<p>", $html);
 
- 		$html = str_replace('</autop>', "</p>\n", $html);
 
- 		
 
- 		$html = str_replace('<br>', '<br />', $html);
 
- 		$html = str_replace($this->_unique . 'AMP', '&', $html);
 
- 		return $html;
 
- 	}
 
- 	/**
 
- 	 * Add P and BR elements as necessary
 
- 	 *
 
- 	 * @param DOMElement $el DOM element
 
- 	 * @return void
 
- 	 */
 
- 	protected function addParagraphs(DOMElement $el) {
 
- 		// no need to call recursively, just queue up
 
- 		$elsToProcess = array($el);
 
- 		$inlinesToProcess = array();
 
- 		while ($el = array_shift($elsToProcess)) {
 
- 			// if true, we can alter all child nodes, if not, we'll just call
 
- 			// addParagraphs on each element in the descendInto list
 
- 			$alterInline = in_array($el->nodeName, $this->_alterList);
 
- 			// inside affected elements, we want to trim leading whitespace from
 
- 			// the first text node
 
- 			$ltrimFirstTextNode = true;
 
- 			// should we open a new AUTOP element to move inline elements into?
 
- 			$openP = true;
 
- 			$autop = null;
 
- 			// after BR, ignore a newline
 
- 			$isFollowingBr = false;
 
- 			$node = $el->firstChild;
 
- 			while (null !== $node) {
 
- 				if ($alterInline) {
 
- 					if ($openP) {
 
- 						$openP = false;
 
- 						// create a P to move inline content into (this may be removed later)
 
- 						$autop = $el->insertBefore($this->_doc->createElement('autop'), $node);
 
- 					}
 
- 				}
 
- 				$isElement = ($node->nodeType === XML_ELEMENT_NODE);
 
- 				if ($isElement) {
 
- 					$isBlock = in_array($node->nodeName, $this->_blocks);
 
- 				} else {
 
- 					$isBlock = false;
 
- 				}
 
- 				if ($alterInline) {
 
- 					$isText = ($node->nodeType === XML_TEXT_NODE);
 
- 					$isLastInline = (! $node->nextSibling
 
- 							|| ($node->nextSibling->nodeType === XML_ELEMENT_NODE
 
- 								&& in_array($node->nextSibling->nodeName, $this->_blocks)));
 
- 					if ($isElement) {
 
- 						$isFollowingBr = ($node->nodeName === 'br');
 
- 					}
 
- 					if ($isText) {
 
- 						$nodeText = $node->nodeValue;
 
- 						if ($ltrimFirstTextNode) {
 
- 							$nodeText = ltrim($nodeText);
 
- 							$ltrimFirstTextNode = false;
 
- 						}
 
- 						if ($isFollowingBr && preg_match('@^[ \\t]*\\n[ \\t]*@', $nodeText, $m)) {
 
- 							// if a user ends a line with <br>, don't add a second BR
 
- 							$nodeText = substr($nodeText, strlen($m[0]));
 
- 						}
 
- 						if ($isLastInline) {
 
- 							$nodeText = rtrim($nodeText);
 
- 						}
 
- 						$nodeText = str_replace("\n", $this->_unique . 'NL', $nodeText);
 
- 						$tmpNode = $node;
 
- 						$node = $node->nextSibling; // move loop to next node
 
- 						// alter node in place, then move into AUTOP
 
- 						$tmpNode->nodeValue = $nodeText;
 
- 						$autop->appendChild($tmpNode);
 
- 						continue;
 
- 					}
 
- 				}
 
- 				if ($isBlock || ! $node->nextSibling) {
 
- 					if ($isBlock) {
 
- 						if (in_array($node->nodeName, $this->_descendList)) {
 
- 							$elsToProcess[] = $node;
 
- 							//$this->addParagraphs($node);
 
- 						}
 
- 					}
 
- 					$openP = true;
 
- 					$ltrimFirstTextNode = true;
 
- 				}
 
- 				if ($alterInline) {
 
- 					if (! $isBlock) {
 
- 						$tmpNode = $node;
 
- 						if ($isElement && false !== strpos($tmpNode->textContent, "\n")) {
 
- 							$inlinesToProcess[] = $tmpNode;
 
- 						}
 
- 						$node = $node->nextSibling;
 
- 						$autop->appendChild($tmpNode);
 
- 						continue;
 
- 					}
 
- 				}
 
- 				$node = $node->nextSibling;
 
- 			}
 
- 		}
 
- 		// handle inline nodes
 
- 		// no need to recurse, just queue up
 
- 		while ($el = array_shift($inlinesToProcess)) {
 
- 			$ignoreLeadingNewline = false;
 
- 			foreach ($el->childNodes as $node) {
 
- 				if ($node->nodeType === XML_ELEMENT_NODE) {
 
- 					if ($node->nodeValue === 'BR') {
 
- 						$ignoreLeadingNewline = true;
 
- 					} else {
 
- 						$ignoreLeadingNewline = false;
 
- 						if (false !== strpos($node->textContent, "\n")) {
 
- 							$inlinesToProcess[] = $node;
 
- 						}
 
- 					}
 
- 					continue;
 
- 				} elseif ($node->nodeType === XML_TEXT_NODE) {
 
- 					$text = $node->nodeValue;
 
- 					if ($text[0] === "\n" && $ignoreLeadingNewline) {
 
- 						$text = substr($text, 1);
 
- 						$ignoreLeadingNewline = false;
 
- 					}
 
- 					$node->nodeValue = str_replace("\n", $this->_unique . 'BR', $text);
 
- 				}
 
- 			}
 
- 		}
 
- 	}
 
- }
 
 
  |