utf-8.php 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. <?php
  2. /*
  3. * decode/utf-8.php
  4. * $Id$
  5. *
  6. * Copyright (c) 2003 The SquirrelMail Project Team
  7. * Licensed under the GNU GPL. For full terms see the file COPYING.
  8. *
  9. * This file contains utf-8 decoding function that is needed to read
  10. * utf-8 encoded mails in non-utf-8 locale.
  11. *
  12. * Every decoded character consists of n bytes. First byte is octal
  13. * 300-375, other bytes - always octals 200-277.
  14. *
  15. * \a\b characters are decoded to html code octdec(a-300)*64 + octdec(b-200)
  16. * \a\b\c characters are decoded to html code octdec(a-340)*64*64 + octdec(b-200)*64 + octdec(c-200)
  17. *
  18. * decoding cycle is unfinished. please test and report problems to tokul@users.sourceforge.net
  19. *
  20. */
  21. function charset_decode_utf8 ($string) {
  22. global $default_charset;
  23. if (strtolower($default_charset) == 'utf-8')
  24. return $string;
  25. /* Only do the slow convert if there are 8-bit characters */
  26. /* avoid using 0xA0 (\240) in ereg ranges. RH73 does not like that */
  27. if (! ereg("[\200-\237]", $string) and ! ereg("[\241-\377]", $string))
  28. return $string;
  29. // decode three byte unicode characters
  30. $string = preg_replace("/([\340-\357])([\200-\277])([\200-\277])/e",
  31. "'&#'.((ord('\\1')-224)*4096+(ord('\\2')-128)*64+(ord('\\3')-128)).';'",
  32. $string);
  33. // decode two byte unicode characters
  34. $string = preg_replace("/([\300-\337])([\200-\277])/e",
  35. "'&#'.((ord('\\1')-192)*64+(ord('\\2')-128)).';'",
  36. $string);
  37. return $string;
  38. }
  39. ?>