Browse Source

if PHP recode functions are used for decoding, they might use html character
entities. Encoding functions can't handle them. It is easier to convert all
entities in one place instead of finding supported entities in every encoding
function.

tokul 19 years ago
parent
commit
f79fb927ee
1 changed files with 346 additions and 64 deletions
  1. 346 64
      functions/i18n.php

+ 346 - 64
functions/i18n.php

@@ -244,6 +244,7 @@ function charset_encode($string,$charset,$htmlencode=true) {
  */
  */
 function charset_convert($in_charset,$string,$out_charset,$htmlencode=true) {
 function charset_convert($in_charset,$string,$out_charset,$htmlencode=true) {
     $string=charset_decode($in_charset,$string,true);
     $string=charset_decode($in_charset,$string,true);
+    $string=sqi18n_convert_entities($string);
     $string=charset_encode($string,$out_charset,$htmlencode);
     $string=charset_encode($string,$out_charset,$htmlencode);
     return $string;
     return $string;
 }
 }
@@ -588,79 +589,360 @@ endswitch;
  * @return bool is it possible to convert to user's charset
  * @return bool is it possible to convert to user's charset
  */
  */
 function is_conversion_safe($input_charset) {
 function is_conversion_safe($input_charset) {
-  global $languages, $sm_notAlias, $default_charset, $lossy_encoding;
+    global $languages, $sm_notAlias, $default_charset, $lossy_encoding;
 
 
     if (isset($lossy_encoding) && $lossy_encoding )
     if (isset($lossy_encoding) && $lossy_encoding )
         return true;
         return true;
 
 
- // convert to lower case
- $input_charset = strtolower($input_charset);
+    // convert to lower case
+    $input_charset = strtolower($input_charset);
 
 
- // Is user's locale Unicode based ?
- if ( $default_charset == "utf-8" ) {
-   return true;
- }
-
- // Charsets that are similar
-switch ($default_charset):
-case "windows-1251":
-      if ( $input_charset == "iso-8859-5" ||
-           $input_charset == "koi8-r" ||
-           $input_charset == "koi8-u" ) {
+    // Is user's locale Unicode based ?
+    if ( $default_charset == "utf-8" ) {
         return true;
         return true;
-     } else {
+    }
+
+    // Charsets that are similar
+    switch ($default_charset) {
+    case "windows-1251":
+        if ( $input_charset == "iso-8859-5" ||
+             $input_charset == "koi8-r" ||
+             $input_charset == "koi8-u" ) {
+            return true;
+        } else {
+            return false;
+        }
+    case "windows-1257":
+        if ( $input_charset == "iso-8859-13" ||
+             $input_charset == "iso-8859-4" ) {
+            return true;
+        } else {
+            return false;
+        }
+    case "iso-8859-4":
+        if ( $input_charset == "iso-8859-13" ||
+             $input_charset == "windows-1257" ) {
+            return true;
+        } else {
+            return false;
+        }
+    case "iso-8859-5":
+        if ( $input_charset == "windows-1251" ||
+             $input_charset == "koi8-r" ||
+             $input_charset == "koi8-u" ) {
+            return true;
+        } else {
+            return false;
+        }
+    case "iso-8859-13":
+        if ( $input_charset == "iso-8859-4" ||
+             $input_charset == "windows-1257" ) {
+            return true;
+        } else {
+            return false;
+        }
+    case "koi8-r":
+        if ( $input_charset == "windows-1251" ||
+             $input_charset == "iso-8859-5" ||
+             $input_charset == "koi8-u" ) {
+            return true;
+        } else {
+            return false;
+        }
+    case "koi8-u":
+        if ( $input_charset == "windows-1251" ||
+             $input_charset == "iso-8859-5" ||
+             $input_charset == "koi8-r" ) {
+            return true;
+        } else {
+            return false;
+        }
+    default:
         return false;
         return false;
-     }
-case "windows-1257":
-  if ( $input_charset == "iso-8859-13" ||
-       $input_charset == "iso-8859-4" ) {
-    return true;
-  } else {
-    return false;
-  }
-case "iso-8859-4":
-  if ( $input_charset == "iso-8859-13" ||
-       $input_charset == "windows-1257" ) {
-     return true;
-  } else {
-     return false;
-  }
-case "iso-8859-5":
-  if ( $input_charset == "windows-1251" ||
-       $input_charset == "koi8-r" ||
-       $input_charset == "koi8-u" ) {
-     return true;
-  } else {
-     return false;
-  }
-case "iso-8859-13":
-  if ( $input_charset == "iso-8859-4" ||
-       $input_charset == "windows-1257" ) {
-     return true;
-  } else {
-     return false;
-  }
-case "koi8-r":
-  if ( $input_charset == "windows-1251" ||
-       $input_charset == "iso-8859-5" ||
-       $input_charset == "koi8-u" ) {
-     return true;
-  } else {
-     return false;
-  }
-case "koi8-u":
-  if ( $input_charset == "windows-1251" ||
-       $input_charset == "iso-8859-5" ||
-       $input_charset == "koi8-r" ) {
-     return true;
-  } else {
-     return false;
-  }
-default:
-   return false;
-endswitch;
+    }
 }
 }
 
 
+/**
+ * Converts html character entities to numeric entities
+ * 
+ * SquirrelMail encoding functions work only with numeric entities.
+ * This function fixes issues with decoding functions that might convert
+ * some symbols to character entities. Issue is specific to PHP recode 
+ * extension decoding. Function is used internally in charset_convert()
+ * function.
+ * @param string $str string that might contain html character entities
+ * @return string string with character entities converted to decimals. 
+ * @since 1.5.2
+ */
+function sqi18n_convert_entities($str) {
+
+    $entities = array(
+        // Latin 1
+        ' '   => ' ',
+        '¡'  => '¡',
+        '¢'   => '¢',
+        '£'  => '£',
+        '¤' => '¤',
+        '¥'    => '¥',
+        '¦' => '¦',
+        '§'   => '§',
+        '¨'    => '¨',
+        '©'   => '©',
+        'ª'   => 'ª',
+        '«'  => '«',
+        '¬'    => '¬',
+        '­'    => '­',
+        '®'    => '®',
+        '¯'   => '¯',
+        '°'    => '°',
+        '±' => '±',
+        '²'   => '²',
+        '³'   => '³',
+        '´'  => '´',
+        'µ'  => 'µ',
+        '¶'   => '¶',
+        '·' => '·',
+        '¸'  => '¸',
+        '¹'   => '¹',
+        'º'   => 'º',
+        '»'  => '»',
+        '¼' => '¼',
+        '½' => '½',
+        '¾' => '¾',
+        '¿' => '¿',
+        'À' => 'À',
+        'Á' => 'Á',
+        'Â'  => 'Â',
+        'Ã' => 'Ã',
+        'Ä'   => 'Ä',
+        'Å'  => 'Å',
+        'Æ'  => 'Æ',
+        'Ç' => 'Ç',
+        'È' => 'È',
+        'É' => 'É',
+        'Ê'  => 'Ê',
+        'Ë'   => 'Ë',
+        'Ì' => 'Ì',
+        'Í' => 'Í',
+        'Î'  => 'Î',
+        'Ï'   => 'Ï',
+        'Ð'    => 'Ð',
+        'Ñ' => 'Ñ',
+        'Ò' => 'Ò',
+        'Ó' => 'Ó',
+        'Ô'  => 'Ô',
+        'Õ' => 'Õ',
+        'Ö'   => 'Ö',
+        '×'  => '×',
+        'Ø' => 'Ø',
+        'Ù' => 'Ù',
+        'Ú' => 'Ú',
+        'Û'  => 'Û',
+        'Ü'   => 'Ü',
+        'Ý' => 'Ý',
+        'Þ'  => 'Þ',
+        'ß'  => 'ß',
+        'à' => 'à',
+        'á' => 'á',
+        'â'  => 'â',
+        'ã' => 'ã',
+        'ä'   => 'ä',
+        'å'  => 'å',
+        'æ'  => 'æ',
+        'ç' => 'ç',
+        'è' => 'è',
+        'é' => 'é',
+        'ê'  => 'ê',
+        'ë'   => 'ë',
+        'ì' => 'ì',
+        'í' => 'í',
+        'î'  => 'î',
+        'ï'   => 'ï',
+        'ð'    => 'ð',
+        'ñ' => 'ñ',
+        'ò' => 'ò',
+        'ó' => 'ó',
+        'ô'  => 'ô',
+        'õ' => 'õ',
+        'ö'   => 'ö',
+        '÷' => '÷',
+        'ø' => 'ø',
+        'ù' => 'ù',
+        'ú' => 'ú',
+        'û'  => 'û',
+        'ü'   => 'ü',
+        'ý' => 'ý',
+        'þ'  => 'þ',
+        'ÿ'   => 'ÿ',
+        // Latin Extended-A
+        'Œ'  => 'Œ',
+        'œ'  => 'œ',
+        'Š' => 'Š',
+        'š' => 'š',
+        'Ÿ'   => 'Ÿ',
+        // Spacing Modifier Letters
+        'ˆ'   => 'ˆ',
+        '˜'  => '˜',
+        // General Punctuation
+        ' '   => ' ',
+        ' '   => ' ',
+        ' ' => ' ',
+        '‌'   => '‌',
+        '‍'    => '‍',
+        '‎'    => '‎',
+        '‏'    => '‏',
+        '–'  => '–',
+        '—'  => '—',
+        '‘'  => '‘',
+        '’'  => '’',
+        '‚'  => '‚',
+        '“'  => '“',
+        '”'  => '”',
+        '„'  => '„',
+        '†' => '†',
+        '‡' => '‡',
+        '‰' => '‰',
+        '‹' => '‹',
+        '›' => '›',
+        '€'   => '€',
+        // Latin Extended-B
+        'ƒ' => 'ƒ',
+        // Greek
+        'Α'  => 'Α',
+        'Β'   => 'Β',
+        'Γ'  => 'Γ',
+        'Δ'  => 'Δ',
+        'Ε' => 'Ε',
+        'Ζ'   => 'Ζ',
+        'Η'    => 'Η',
+        'Θ'  => 'Θ',
+        'Ι'   => 'Ι',
+        'Κ'  => 'Κ',
+        'Λ' => 'Λ',
+        'Μ'     => 'Μ',
+        'Ν'     => 'Ν',
+        'Ξ'     => 'Ξ',
+        'Ο' => 'Ο',
+        'Π'     => 'Π',
+        'Ρ'    => 'Ρ',
+        'Σ'  => 'Σ',
+        'Τ'    => 'Τ',
+        'Υ' => 'Υ',
+        'Φ'    => 'Φ',
+        'Χ'    => 'Χ',
+        'Ψ'    => 'Ψ',
+        'Ω'  => 'Ω',
+        'α'  => 'α',
+        'β'   => 'β',
+        'γ'  => 'γ',
+        'δ'  => 'δ',
+        'ε' => 'ε',
+        'ζ'   => 'ζ',
+        'η'    => 'η',
+        'θ'  => 'θ',
+        'ι'   => 'ι',
+        'κ'  => 'κ',
+        'λ' => 'λ',
+        'μ'     => 'μ',
+        'ν'     => 'ν',
+        'ξ'     => 'ξ',
+        'ο' => 'ο',
+        'π'     => 'π',
+        'ρ'    => 'ρ',
+        'ς' => 'ς',
+        'σ'  => 'σ',
+        'τ'    => 'τ',
+        'υ' => 'υ',
+        'φ'    => 'φ',
+        'χ'    => 'χ',
+        'ψ'    => 'ψ',
+        'ω'  => 'ω',
+        'ϑ' => 'ϑ',
+        'ϒ'  => 'ϒ',
+        'ϖ'    => 'ϖ',
+        // General Punctuation
+        '•'   => '•',
+        '…' => '…',
+        '′'  => '′',
+        '″'  => '″',
+        '‾'  => '‾',
+        '⁄'  => '⁄',
+        // Letterlike Symbols
+        '℘' => '℘',
+        'ℑ'  => 'ℑ',
+        'ℜ'   => 'ℜ',
+        '™'  => '™',
+        'ℵ' => 'ℵ',
+        // Arrows
+        '←'   => '←',
+        '↑'   => '↑',
+        '→'   => '→',
+        '↓'   => '↓',
+        '↔'   => '↔',
+        '↵'  => '↵',
+        '⇐'   => '⇐',
+        '⇑'   => '⇑',
+        '⇒'   => '⇒',
+        '⇓'   => '⇓',
+        '⇔'   => '⇔',
+        // Mathematical Operators
+        '∀' => '∀',
+        '∂'   => '∂',
+        '∃'  => '∃',
+        '∅'  => '∅',
+        '∇'  => '∇',
+        '∈'   => '∈',
+        '∉'  => '∉',
+        '∋'     => '∋',
+        '∏'   => '∏',
+        '∑'    => '∑',
+        '−'  => '−',
+        '∗' => '∗',
+        '√'  => '√',
+        '∝'   => '∝',
+        '∞'  => '∞',
+        '∠'    => '∠',
+        '∧'    => '∧',
+        '∨'     => '∨',
+        '∩'    => '∩',
+        '∪'    => '∪',
+        '∫'    => '∫',
+        '∴' => '∴',
+        '∼'    => '∼',
+        '≅'   => '≅',
+        '≈'  => '≈',
+        '≠'     => '≠',
+        '≡'  => '≡',
+        '≤'     => '≤',
+        '≥'     => '≥',
+        '⊂'    => '⊂',
+        '⊃'    => '⊃',
+        '⊄'   => '⊄',
+        '⊆'   => '⊆',
+        '⊇'   => '⊇',
+        '⊕'  => '⊕',
+        '⊗' => '⊗',
+        '⊥'   => '⊥',
+        '⋅'   => '⋅',
+        // Miscellaneous Technical
+        '⌈'  => '⌈',
+        '⌉'  => '⌉',
+        '⌊' => '⌊',
+        '⌋' => '⌋',
+        '⟨'   => '〈',
+        '⟩'   => '〉',
+        // Geometric Shapes
+        '◊'    => '◊',
+        // Miscellaneous Symbols
+        '♠' => '♠',
+        '♣'  => '♣',
+        '♥' => '♥',
+        '♦'  => '♦');
+
+    $str = str_replace(array_keys($entities), array_values($entities), $str);
+
+    return $str;
+}
 
 
 /* ------------------------------ main --------------------------- */
 /* ------------------------------ main --------------------------- */