utf_8.php 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. <?php
  2. /**
  3. * decode/utf-8.php
  4. *
  5. * Copyright (c) 2003-2004 The SquirrelMail Project Team
  6. * Licensed under the GNU GPL. For full terms see the file COPYING.
  7. *
  8. * This file contains utf-8 decoding function that is needed to read
  9. * utf-8 encoded mails in non-utf-8 locale.
  10. *
  11. * Every decoded character consists of n bytes. First byte is octal
  12. * 300-375, other bytes - always octals 200-277.
  13. *
  14. * \a\b characters are decoded to html code octdec(a-300)*64 + octdec(b-200)
  15. * \a\b\c characters are decoded to html code octdec(a-340)*64*64 + octdec(b-200)*64 + octdec(c-200)
  16. *
  17. * decoding cycle is unfinished. please test and report problems to tokul@users.sourceforge.net
  18. *
  19. * @version $Id$
  20. * @package squirrelmail
  21. * @subpackage decode
  22. */
  23. /**
  24. * Decode utf-8 strings
  25. * @param string $string Encoded string
  26. * @return string Decoded string
  27. */
  28. function charset_decode_utf_8 ($string) {
  29. global $default_charset,$squirrelmail_language;
  30. if ($squirrelmail_language == 'ja_JP')
  31. return $string;
  32. // don't do decoding when there are no 8bit symbols
  33. if (! sq_is8bit($string,'utf-8'))
  34. return $string;
  35. // decode three byte unicode characters
  36. $string = preg_replace("/([\340-\357])([\200-\277])([\200-\277])/e",
  37. "'&#'.((ord('\\1')-224)*4096+(ord('\\2')-128)*64+(ord('\\3')-128)).';'",
  38. $string);
  39. // decode two byte unicode characters
  40. $string = preg_replace("/([\300-\337])([\200-\277])/e",
  41. "'&#'.((ord('\\1')-192)*64+(ord('\\2')-128)).';'",
  42. $string);
  43. return $string;
  44. }
  45. ?>