123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270 |
- <?php
- namespace Elgg;
- class Translit {
-
- static public function urlize($string, $separator = '-') {
-
-
- if (self::hasNormalizerSupport()) {
- $nfc = normalizer_normalize($string);
- if (is_string($nfc)) {
- $string = $nfc;
- }
- }
-
- $string = self::transliterateAscii($string);
-
- $string = preg_replace('~<([a-zA-Z][^>]*)>~', ' $1 ', $string);
-
-
- $string = strtr($string, array(
-
- "\xE2\x82\xAC" => ' E ',
- "\xC2\xA3" => ' GBP ',
- ));
-
-
- $string = preg_replace('~['
- . '\x00-\x08'
- . '\x0b\x0c'
- . '\x0e-\x1f'
- . '\x21-\x2c'
- . '\x2e\x2f'
- . '\x3a-\x40'
- . '\x5b-\x5e'
- . '\x60'
- . '\x7b-\x7f'
- . ']~', '', $string);
- $string = strtr($string, '', '');
-
-
- $string = is_callable('mb_strtolower') ? mb_strtolower($string, 'UTF-8') : strtolower($string);
-
-
-
- $pattern = '~['
- . '\x00-\x2f'
- . '\x3a-\x40'
- . '\x5b-\x60'
- . '\x7b-\x7f'
- . ']+~x';
-
- $words = preg_split($pattern, $string, -1, PREG_SPLIT_NO_EMPTY);
-
- $words = array_map('urlencode', $words);
-
- return implode($separator, $words);
- }
-
- static public function transliterateAscii($utf8) {
- static $map = null;
- if (!preg_match('/[\x80-\xff]/', $utf8)) {
- return $utf8;
- }
- if (null === $map) {
- $map = self::getAsciiTranslitMap();
- }
- return strtr($utf8, $map);
- }
-
- static public function getAsciiTranslitMap() {
- return array(
-
- "\xC2\xAA" => 'a', "\xC2\xBA" => 'o', "\xC3\x80" => 'A',
- "\xC3\x81" => 'A', "\xC3\x82" => 'A', "\xC3\x83" => 'A',
- "\xC3\x84" => 'A', "\xC3\x85" => 'A', "\xC3\x86" => 'AE',
- "\xC3\x87" => 'C', "\xC3\x88" => 'E', "\xC3\x89" => 'E',
- "\xC3\x8A" => 'E', "\xC3\x8B" => 'E', "\xC3\x8C" => 'I',
- "\xC3\x8D" => 'I', "\xC3\x8E" => 'I', "\xC3\x8F" => 'I',
- "\xC3\x90" => 'D', "\xC3\x91" => 'N', "\xC3\x92" => 'O',
- "\xC3\x93" => 'O', "\xC3\x94" => 'O', "\xC3\x95" => 'O',
- "\xC3\x96" => 'O', "\xC3\x99" => 'U', "\xC3\x9A" => 'U',
- "\xC3\x9B" => 'U', "\xC3\x9C" => 'U', "\xC3\x9D" => 'Y',
- "\xC3\x9E" => 'TH', "\xC3\x9F" => 'ss', "\xC3\xA0" => 'a',
- "\xC3\xA1" => 'a', "\xC3\xA2" => 'a', "\xC3\xA3" => 'a',
- "\xC3\xA4" => 'a', "\xC3\xA5" => 'a', "\xC3\xA6" => 'ae',
- "\xC3\xA7" => 'c', "\xC3\xA8" => 'e', "\xC3\xA9" => 'e',
- "\xC3\xAA" => 'e', "\xC3\xAB" => 'e', "\xC3\xAC" => 'i',
- "\xC3\xAD" => 'i', "\xC3\xAE" => 'i', "\xC3\xAF" => 'i',
- "\xC3\xB0" => 'd', "\xC3\xB1" => 'n', "\xC3\xB2" => 'o',
- "\xC3\xB3" => 'o', "\xC3\xB4" => 'o', "\xC3\xB5" => 'o',
- "\xC3\xB6" => 'o', "\xC3\xB8" => 'o', "\xC3\xB9" => 'u',
- "\xC3\xBA" => 'u', "\xC3\xBB" => 'u', "\xC3\xBC" => 'u',
- "\xC3\xBD" => 'y', "\xC3\xBE" => 'th', "\xC3\xBF" => 'y',
- "\xC3\x98" => 'O',
-
- "\xC4\x80" => 'A', "\xC4\x81" => 'a', "\xC4\x82" => 'A',
- "\xC4\x83" => 'a', "\xC4\x84" => 'A', "\xC4\x85" => 'a',
- "\xC4\x86" => 'C', "\xC4\x87" => 'c', "\xC4\x88" => 'C',
- "\xC4\x89" => 'c', "\xC4\x8A" => 'C', "\xC4\x8B" => 'c',
- "\xC4\x8C" => 'C', "\xC4\x8D" => 'c', "\xC4\x8E" => 'D',
- "\xC4\x8F" => 'd', "\xC4\x90" => 'D', "\xC4\x91" => 'd',
- "\xC4\x92" => 'E', "\xC4\x93" => 'e', "\xC4\x94" => 'E',
- "\xC4\x95" => 'e', "\xC4\x96" => 'E', "\xC4\x97" => 'e',
- "\xC4\x98" => 'E', "\xC4\x99" => 'e', "\xC4\x9A" => 'E',
- "\xC4\x9B" => 'e', "\xC4\x9C" => 'G', "\xC4\x9D" => 'g',
- "\xC4\x9E" => 'G', "\xC4\x9F" => 'g', "\xC4\xA0" => 'G',
- "\xC4\xA1" => 'g', "\xC4\xA2" => 'G', "\xC4\xA3" => 'g',
- "\xC4\xA4" => 'H', "\xC4\xA5" => 'h', "\xC4\xA6" => 'H',
- "\xC4\xA7" => 'h', "\xC4\xA8" => 'I', "\xC4\xA9" => 'i',
- "\xC4\xAA" => 'I', "\xC4\xAB" => 'i', "\xC4\xAC" => 'I',
- "\xC4\xAD" => 'i', "\xC4\xAE" => 'I', "\xC4\xAF" => 'i',
- "\xC4\xB0" => 'I', "\xC4\xB1" => 'i', "\xC4\xB2" => 'IJ',
- "\xC4\xB3" => 'ij', "\xC4\xB4" => 'J', "\xC4\xB5" => 'j',
- "\xC4\xB6" => 'K', "\xC4\xB7" => 'k', "\xC4\xB8" => 'k',
- "\xC4\xB9" => 'L', "\xC4\xBA" => 'l', "\xC4\xBB" => 'L',
- "\xC4\xBC" => 'l', "\xC4\xBD" => 'L', "\xC4\xBE" => 'l',
- "\xC4\xBF" => 'L', "\xC5\x80" => 'l', "\xC5\x81" => 'L',
- "\xC5\x82" => 'l', "\xC5\x83" => 'N', "\xC5\x84" => 'n',
- "\xC5\x85" => 'N', "\xC5\x86" => 'n', "\xC5\x87" => 'N',
- "\xC5\x88" => 'n', "\xC5\x89" => 'N', "\xC5\x8A" => 'n',
- "\xC5\x8B" => 'N', "\xC5\x8C" => 'O', "\xC5\x8D" => 'o',
- "\xC5\x8E" => 'O', "\xC5\x8F" => 'o', "\xC5\x90" => 'O',
- "\xC5\x91" => 'o', "\xC5\x92" => 'OE', "\xC5\x93" => 'oe',
- "\xC5\x94" => 'R', "\xC5\x95" => 'r', "\xC5\x96" => 'R',
- "\xC5\x97" => 'r', "\xC5\x98" => 'R', "\xC5\x99" => 'r',
- "\xC5\x9A" => 'S', "\xC5\x9B" => 's', "\xC5\x9C" => 'S',
- "\xC5\x9D" => 's', "\xC5\x9E" => 'S', "\xC5\x9F" => 's',
- "\xC5\xA0" => 'S', "\xC5\xA1" => 's', "\xC5\xA2" => 'T',
- "\xC5\xA3" => 't', "\xC5\xA4" => 'T', "\xC5\xA5" => 't',
- "\xC5\xA6" => 'T', "\xC5\xA7" => 't', "\xC5\xA8" => 'U',
- "\xC5\xA9" => 'u', "\xC5\xAA" => 'U', "\xC5\xAB" => 'u',
- "\xC5\xAC" => 'U', "\xC5\xAD" => 'u', "\xC5\xAE" => 'U',
- "\xC5\xAF" => 'u', "\xC5\xB0" => 'U', "\xC5\xB1" => 'u',
- "\xC5\xB2" => 'U', "\xC5\xB3" => 'u', "\xC5\xB4" => 'W',
- "\xC5\xB5" => 'w', "\xC5\xB6" => 'Y', "\xC5\xB7" => 'y',
- "\xC5\xB8" => 'Y', "\xC5\xB9" => 'Z', "\xC5\xBA" => 'z',
- "\xC5\xBB" => 'Z', "\xC5\xBC" => 'z', "\xC5\xBD" => 'Z',
- "\xC5\xBE" => 'z', "\xC5\xBF" => 's',
-
- "\xC8\x98" => 'S', "\xC8\x99" => 's',
- "\xC8\x9A" => 'T', "\xC8\x9B" => 't',
-
- "\xC6\xA0" => 'O', "\xC6\xA1" => 'o',
- "\xC6\xAF" => 'U', "\xC6\xB0" => 'u',
-
- "\xE1\xBA\xA6" => 'A', "\xE1\xBA\xA7" => 'a',
- "\xE1\xBA\xB0" => 'A', "\xE1\xBA\xB1" => 'a',
- "\xE1\xBB\x80" => 'E', "\xE1\xBB\x81" => 'e',
- "\xE1\xBB\x92" => 'O', "\xE1\xBB\x93" => 'o',
- "\xE1\xBB\x9C" => 'O', "\xE1\xBB\x9D" => 'o',
- "\xE1\xBB\xAA" => 'U', "\xE1\xBB\xAB" => 'u',
- "\xE1\xBB\xB2" => 'Y', "\xE1\xBB\xB3" => 'y',
-
- "\xE1\xBA\xA2" => 'A', "\xE1\xBA\xA3" => 'a',
- "\xE1\xBA\xA8" => 'A', "\xE1\xBA\xA9" => 'a',
- "\xE1\xBA\xB2" => 'A', "\xE1\xBA\xB3" => 'a',
- "\xE1\xBA\xBA" => 'E', "\xE1\xBA\xBB" => 'e',
- "\xE1\xBB\x82" => 'E', "\xE1\xBB\x83" => 'e',
- "\xE1\xBB\x88" => 'I', "\xE1\xBB\x89" => 'i',
- "\xE1\xBB\x8E" => 'O', "\xE1\xBB\x8F" => 'o',
- "\xE1\xBB\x94" => 'O', "\xE1\xBB\x95" => 'o',
- "\xE1\xBB\x9E" => 'O', "\xE1\xBB\x9F" => 'o',
- "\xE1\xBB\xA6" => 'U', "\xE1\xBB\xA7" => 'u',
- "\xE1\xBB\xAC" => 'U', "\xE1\xBB\xAD" => 'u',
- "\xE1\xBB\xB6" => 'Y', "\xE1\xBB\xB7" => 'y',
-
- "\xE1\xBA\xAA" => 'A', "\xE1\xBA\xAB" => 'a',
- "\xE1\xBA\xB4" => 'A', "\xE1\xBA\xB5" => 'a',
- "\xE1\xBA\xBC" => 'E', "\xE1\xBA\xBD" => 'e',
- "\xE1\xBB\x84" => 'E', "\xE1\xBB\x85" => 'e',
- "\xE1\xBB\x96" => 'O', "\xE1\xBB\x97" => 'o',
- "\xE1\xBB\xA0" => 'O', "\xE1\xBB\xA1" => 'o',
- "\xE1\xBB\xAE" => 'U', "\xE1\xBB\xAF" => 'u',
- "\xE1\xBB\xB8" => 'Y', "\xE1\xBB\xB9" => 'y',
-
- "\xE1\xBA\xA4" => 'A', "\xE1\xBA\xA5" => 'a',
- "\xE1\xBA\xAE" => 'A', "\xE1\xBA\xAF" => 'a',
- "\xE1\xBA\xBE" => 'E', "\xE1\xBA\xBF" => 'e',
- "\xE1\xBB\x90" => 'O', "\xE1\xBB\x91" => 'o',
- "\xE1\xBB\x9A" => 'O', "\xE1\xBB\x9B" => 'o',
- "\xE1\xBB\xA8" => 'U', "\xE1\xBB\xA9" => 'u',
-
- "\xE1\xBA\xA0" => 'A', "\xE1\xBA\xA1" => 'a',
- "\xE1\xBA\xAC" => 'A', "\xE1\xBA\xAD" => 'a',
- "\xE1\xBA\xB6" => 'A', "\xE1\xBA\xB7" => 'a',
- "\xE1\xBA\xB8" => 'E', "\xE1\xBA\xB9" => 'e',
- "\xE1\xBB\x86" => 'E', "\xE1\xBB\x87" => 'e',
- "\xE1\xBB\x8A" => 'I', "\xE1\xBB\x8B" => 'i',
- "\xE1\xBB\x8C" => 'O', "\xE1\xBB\x8D" => 'o',
- "\xE1\xBB\x98" => 'O', "\xE1\xBB\x99" => 'o',
- "\xE1\xBB\xA2" => 'O', "\xE1\xBB\xA3" => 'o',
- "\xE1\xBB\xA4" => 'U', "\xE1\xBB\xA5" => 'u',
- "\xE1\xBB\xB0" => 'U', "\xE1\xBB\xB1" => 'u',
- "\xE1\xBB\xB4" => 'Y', "\xE1\xBB\xB5" => 'y',
- );
- }
-
- static public function hasNormalizerSupport() {
- static $ret = null;
- if (null === $ret) {
- $form_c = "\xC3\x85";
- $form_d = "A\xCC\x8A";
- $ret = (function_exists('normalizer_normalize')
- && $form_c === normalizer_normalize($form_d));
- }
- return $ret;
- }
- }
|