浏览代码

if PHP recode functions are used for decoding, they might use html character
entities. Encoding functions can't handle them. It is easier to convert all
entities in one place instead of finding supported entities in every encoding
function.

tokul 19 年之前
父节点
当前提交
f79fb927ee
共有 1 个文件被更改,包括 346 次插入64 次删除
  1. 346 64
      functions/i18n.php

+ 346 - 64
functions/i18n.php

@@ -244,6 +244,7 @@ function charset_encode($string,$charset,$htmlencode=true) {
  */
 function charset_convert($in_charset,$string,$out_charset,$htmlencode=true) {
     $string=charset_decode($in_charset,$string,true);
+    $string=sqi18n_convert_entities($string);
     $string=charset_encode($string,$out_charset,$htmlencode);
     return $string;
 }
@@ -588,79 +589,360 @@ endswitch;
  * @return bool is it possible to convert to user's charset
  */
 function is_conversion_safe($input_charset) {
-  global $languages, $sm_notAlias, $default_charset, $lossy_encoding;
+    global $languages, $sm_notAlias, $default_charset, $lossy_encoding;
 
     if (isset($lossy_encoding) && $lossy_encoding )
         return true;
 
- // convert to lower case
- $input_charset = strtolower($input_charset);
+    // convert to lower case
+    $input_charset = strtolower($input_charset);
 
- // Is user's locale Unicode based ?
- if ( $default_charset == "utf-8" ) {
-   return true;
- }
-
- // Charsets that are similar
-switch ($default_charset):
-case "windows-1251":
-      if ( $input_charset == "iso-8859-5" ||
-           $input_charset == "koi8-r" ||
-           $input_charset == "koi8-u" ) {
+    // Is user's locale Unicode based ?
+    if ( $default_charset == "utf-8" ) {
         return true;
-     } else {
+    }
+
+    // Charsets that are similar
+    switch ($default_charset) {
+    case "windows-1251":
+        if ( $input_charset == "iso-8859-5" ||
+             $input_charset == "koi8-r" ||
+             $input_charset == "koi8-u" ) {
+            return true;
+        } else {
+            return false;
+        }
+    case "windows-1257":
+        if ( $input_charset == "iso-8859-13" ||
+             $input_charset == "iso-8859-4" ) {
+            return true;
+        } else {
+            return false;
+        }
+    case "iso-8859-4":
+        if ( $input_charset == "iso-8859-13" ||
+             $input_charset == "windows-1257" ) {
+            return true;
+        } else {
+            return false;
+        }
+    case "iso-8859-5":
+        if ( $input_charset == "windows-1251" ||
+             $input_charset == "koi8-r" ||
+             $input_charset == "koi8-u" ) {
+            return true;
+        } else {
+            return false;
+        }
+    case "iso-8859-13":
+        if ( $input_charset == "iso-8859-4" ||
+             $input_charset == "windows-1257" ) {
+            return true;
+        } else {
+            return false;
+        }
+    case "koi8-r":
+        if ( $input_charset == "windows-1251" ||
+             $input_charset == "iso-8859-5" ||
+             $input_charset == "koi8-u" ) {
+            return true;
+        } else {
+            return false;
+        }
+    case "koi8-u":
+        if ( $input_charset == "windows-1251" ||
+             $input_charset == "iso-8859-5" ||
+             $input_charset == "koi8-r" ) {
+            return true;
+        } else {
+            return false;
+        }
+    default:
         return false;
-     }
-case "windows-1257":
-  if ( $input_charset == "iso-8859-13" ||
-       $input_charset == "iso-8859-4" ) {
-    return true;
-  } else {
-    return false;
-  }
-case "iso-8859-4":
-  if ( $input_charset == "iso-8859-13" ||
-       $input_charset == "windows-1257" ) {
-     return true;
-  } else {
-     return false;
-  }
-case "iso-8859-5":
-  if ( $input_charset == "windows-1251" ||
-       $input_charset == "koi8-r" ||
-       $input_charset == "koi8-u" ) {
-     return true;
-  } else {
-     return false;
-  }
-case "iso-8859-13":
-  if ( $input_charset == "iso-8859-4" ||
-       $input_charset == "windows-1257" ) {
-     return true;
-  } else {
-     return false;
-  }
-case "koi8-r":
-  if ( $input_charset == "windows-1251" ||
-       $input_charset == "iso-8859-5" ||
-       $input_charset == "koi8-u" ) {
-     return true;
-  } else {
-     return false;
-  }
-case "koi8-u":
-  if ( $input_charset == "windows-1251" ||
-       $input_charset == "iso-8859-5" ||
-       $input_charset == "koi8-r" ) {
-     return true;
-  } else {
-     return false;
-  }
-default:
-   return false;
-endswitch;
+    }
 }
 
+/**
+ * Converts html character entities to numeric entities
+ * 
+ * SquirrelMail encoding functions work only with numeric entities.
+ * This function fixes issues with decoding functions that might convert
+ * some symbols to character entities. Issue is specific to PHP recode 
+ * extension decoding. Function is used internally in charset_convert()
+ * function.
+ * @param string $str string that might contain html character entities
+ * @return string string with character entities converted to decimals. 
+ * @since 1.5.2
+ */
+function sqi18n_convert_entities($str) {
+
+    $entities = array(
+        // Latin 1
+        ' '   => ' ',
+        '¡'  => '¡',
+        '¢'   => '¢',
+        '£'  => '£',
+        '¤' => '¤',
+        '¥'    => '¥',
+        '¦' => '¦',
+        '§'   => '§',
+        '¨'    => '¨',
+        '©'   => '©',
+        'ª'   => 'ª',
+        '«'  => '«',
+        '¬'    => '¬',
+        '­'    => '­',
+        '®'    => '®',
+        '¯'   => '¯',
+        '°'    => '°',
+        '±' => '±',
+        '²'   => '²',
+        '³'   => '³',
+        '´'  => '´',
+        'µ'  => 'µ',
+        '¶'   => '¶',
+        '·' => '·',
+        '¸'  => '¸',
+        '¹'   => '¹',
+        'º'   => 'º',
+        '»'  => '»',
+        '¼' => '¼',
+        '½' => '½',
+        '¾' => '¾',
+        '¿' => '¿',
+        'À' => 'À',
+        'Á' => 'Á',
+        'Â'  => 'Â',
+        'Ã' => 'Ã',
+        'Ä'   => 'Ä',
+        'Å'  => 'Å',
+        'Æ'  => 'Æ',
+        'Ç' => 'Ç',
+        'È' => 'È',
+        'É' => 'É',
+        'Ê'  => 'Ê',
+        'Ë'   => 'Ë',
+        'Ì' => 'Ì',
+        'Í' => 'Í',
+        'Î'  => 'Î',
+        'Ï'   => 'Ï',
+        'Ð'    => 'Ð',
+        'Ñ' => 'Ñ',
+        'Ò' => 'Ò',
+        'Ó' => 'Ó',
+        'Ô'  => 'Ô',
+        'Õ' => 'Õ',
+        'Ö'   => 'Ö',
+        '×'  => '×',
+        'Ø' => 'Ø',
+        'Ù' => 'Ù',
+        'Ú' => 'Ú',
+        'Û'  => 'Û',
+        'Ü'   => 'Ü',
+        'Ý' => 'Ý',
+        'Þ'  => 'Þ',
+        'ß'  => 'ß',
+        'à' => 'à',
+        'á' => 'á',
+        'â'  => 'â',
+        'ã' => 'ã',
+        'ä'   => 'ä',
+        'å'  => 'å',
+        'æ'  => 'æ',
+        'ç' => 'ç',
+        'è' => 'è',
+        'é' => 'é',
+        'ê'  => 'ê',
+        'ë'   => 'ë',
+        'ì' => 'ì',
+        'í' => 'í',
+        'î'  => 'î',
+        'ï'   => 'ï',
+        'ð'    => 'ð',
+        'ñ' => 'ñ',
+        'ò' => 'ò',
+        'ó' => 'ó',
+        'ô'  => 'ô',
+        'õ' => 'õ',
+        'ö'   => 'ö',
+        '÷' => '÷',
+        'ø' => 'ø',
+        'ù' => 'ù',
+        'ú' => 'ú',
+        'û'  => 'û',
+        'ü'   => 'ü',
+        'ý' => 'ý',
+        'þ'  => 'þ',
+        'ÿ'   => 'ÿ',
+        // Latin Extended-A
+        'Œ'  => 'Œ',
+        'œ'  => 'œ',
+        'Š' => 'Š',
+        'š' => 'š',
+        'Ÿ'   => 'Ÿ',
+        // Spacing Modifier Letters
+        'ˆ'   => 'ˆ',
+        '˜'  => '˜',
+        // General Punctuation
+        ' '   => ' ',
+        ' '   => ' ',
+        ' ' => ' ',
+        '‌'   => '‌',
+        '‍'    => '‍',
+        '‎'    => '‎',
+        '‏'    => '‏',
+        '–'  => '–',
+        '—'  => '—',
+        '‘'  => '‘',
+        '’'  => '’',
+        '‚'  => '‚',
+        '“'  => '“',
+        '”'  => '”',
+        '„'  => '„',
+        '†' => '†',
+        '‡' => '‡',
+        '‰' => '‰',
+        '‹' => '‹',
+        '›' => '›',
+        '€'   => '€',
+        // Latin Extended-B
+        'ƒ' => 'ƒ',
+        // Greek
+        'Α'  => 'Α',
+        'Β'   => 'Β',
+        'Γ'  => 'Γ',
+        'Δ'  => 'Δ',
+        'Ε' => 'Ε',
+        'Ζ'   => 'Ζ',
+        'Η'    => 'Η',
+        'Θ'  => 'Θ',
+        'Ι'   => 'Ι',
+        'Κ'  => 'Κ',
+        'Λ' => 'Λ',
+        'Μ'     => 'Μ',
+        'Ν'     => 'Ν',
+        'Ξ'     => 'Ξ',
+        'Ο' => 'Ο',
+        'Π'     => 'Π',
+        'Ρ'    => 'Ρ',
+        'Σ'  => 'Σ',
+        'Τ'    => 'Τ',
+        'Υ' => 'Υ',
+        'Φ'    => 'Φ',
+        'Χ'    => 'Χ',
+        'Ψ'    => 'Ψ',
+        'Ω'  => 'Ω',
+        'α'  => 'α',
+        'β'   => 'β',
+        'γ'  => 'γ',
+        'δ'  => 'δ',
+        'ε' => 'ε',
+        'ζ'   => 'ζ',
+        'η'    => 'η',
+        'θ'  => 'θ',
+        'ι'   => 'ι',
+        'κ'  => 'κ',
+        'λ' => 'λ',
+        'μ'     => 'μ',
+        'ν'     => 'ν',
+        'ξ'     => 'ξ',
+        'ο' => 'ο',
+        'π'     => 'π',
+        'ρ'    => 'ρ',
+        'ς' => 'ς',
+        'σ'  => 'σ',
+        'τ'    => 'τ',
+        'υ' => 'υ',
+        'φ'    => 'φ',
+        'χ'    => 'χ',
+        'ψ'    => 'ψ',
+        'ω'  => 'ω',
+        'ϑ' => 'ϑ',
+        'ϒ'  => 'ϒ',
+        'ϖ'    => 'ϖ',
+        // General Punctuation
+        '•'   => '•',
+        '…' => '…',
+        '′'  => '′',
+        '″'  => '″',
+        '‾'  => '‾',
+        '⁄'  => '⁄',
+        // Letterlike Symbols
+        '℘' => '℘',
+        'ℑ'  => 'ℑ',
+        'ℜ'   => 'ℜ',
+        '™'  => '™',
+        'ℵ' => 'ℵ',
+        // Arrows
+        '←'   => '←',
+        '↑'   => '↑',
+        '→'   => '→',
+        '↓'   => '↓',
+        '↔'   => '↔',
+        '↵'  => '↵',
+        '⇐'   => '⇐',
+        '⇑'   => '⇑',
+        '⇒'   => '⇒',
+        '⇓'   => '⇓',
+        '⇔'   => '⇔',
+        // Mathematical Operators
+        '∀' => '∀',
+        '∂'   => '∂',
+        '∃'  => '∃',
+        '∅'  => '∅',
+        '∇'  => '∇',
+        '∈'   => '∈',
+        '∉'  => '∉',
+        '∋'     => '∋',
+        '∏'   => '∏',
+        '∑'    => '∑',
+        '−'  => '−',
+        '∗' => '∗',
+        '√'  => '√',
+        '∝'   => '∝',
+        '∞'  => '∞',
+        '∠'    => '∠',
+        '∧'    => '∧',
+        '∨'     => '∨',
+        '∩'    => '∩',
+        '∪'    => '∪',
+        '∫'    => '∫',
+        '∴' => '∴',
+        '∼'    => '∼',
+        '≅'   => '≅',
+        '≈'  => '≈',
+        '≠'     => '≠',
+        '≡'  => '≡',
+        '≤'     => '≤',
+        '≥'     => '≥',
+        '⊂'    => '⊂',
+        '⊃'    => '⊃',
+        '⊄'   => '⊄',
+        '⊆'   => '⊆',
+        '⊇'   => '⊇',
+        '⊕'  => '⊕',
+        '⊗' => '⊗',
+        '⊥'   => '⊥',
+        '⋅'   => '⋅',
+        // Miscellaneous Technical
+        '⌈'  => '⌈',
+        '⌉'  => '⌉',
+        '⌊' => '⌊',
+        '⌋' => '⌋',
+        '⟨'   => '〈',
+        '⟩'   => '〉',
+        // Geometric Shapes
+        '◊'    => '◊',
+        // Miscellaneous Symbols
+        '♠' => '♠',
+        '♣'  => '♣',
+        '♥' => '♥',
+        '♦'  => '♦');
+
+    $str = str_replace(array_keys($entities), array_values($entities), $str);
+
+    return $str;
+}
 
 /* ------------------------------ main --------------------------- */