utf_8.php 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. <?php
  2. /**
  3. * decode/utf-8.php
  4. *
  5. * Copyright (c) 2003-2004 The SquirrelMail Project Team
  6. * Licensed under the GNU GPL. For full terms see the file COPYING.
  7. *
  8. * This file contains utf-8 decoding function that is needed to read
  9. * utf-8 encoded mails in non-utf-8 locale.
  10. *
  11. * Every decoded character consists of n bytes. First byte is octal
  12. * 300-375, other bytes - always octals 200-277.
  13. *
  14. * \a\b characters are decoded to html code octdec(a-300)*64 + octdec(b-200)
  15. * \a\b\c characters are decoded to html code octdec(a-340)*64*64 + octdec(b-200)*64 + octdec(c-200)
  16. *
  17. * decoding cycle is unfinished. please test and report problems to tokul@users.sourceforge.net
  18. *
  19. * @version $Id$
  20. * @package squirrelmail
  21. * @subpackage decode
  22. */
  23. /**
  24. * Decode utf-8 strings
  25. * @param string $string Encoded string
  26. * @return string Decoded string
  27. */
  28. function charset_decode_utf_8 ($string) {
  29. global $default_charset,$squirrelmail_language;
  30. if ($squirrelmail_language == 'ja_JP')
  31. return $string;
  32. /* Only do the slow convert if there are 8-bit characters */
  33. /* avoid using 0xA0 (\240) in ereg ranges. RH73 does not like that */
  34. if (! ereg("[\200-\237]", $string) and ! ereg("[\241-\377]", $string))
  35. return $string;
  36. // decode three byte unicode characters
  37. $string = preg_replace("/([\340-\357])([\200-\277])([\200-\277])/e",
  38. "'&#'.((ord('\\1')-224)*4096+(ord('\\2')-128)*64+(ord('\\3')-128)).';'",
  39. $string);
  40. // decode two byte unicode characters
  41. $string = preg_replace("/([\300-\337])([\200-\277])/e",
  42. "'&#'.((ord('\\1')-192)*64+(ord('\\2')-128)).';'",
  43. $string);
  44. return $string;
  45. }
  46. ?>