utf_8.php 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. <?php
  2. /**
  3. * utf-8 encoding function
  4. *
  5. * takes a string of unicode entities and converts it to a utf-8 encoded string
  6. * each unicode entitiy has the form &#nnn(nn); n={0..9} and can be displayed by utf-8 supporting
  7. * browsers. Ascii will not be modified.
  8. *
  9. * code is taken from www.php.net manual comments
  10. * Author: ronen at greyzone dot com
  11. *
  12. * @version $Id$
  13. * @package squirrelmail
  14. * @subpackage encode
  15. */
  16. /**
  17. * Converts string to utf-8
  18. * @param $source string of unicode entities [STRING]
  19. * @return a utf-8 encoded string [STRING]
  20. */
  21. function charset_encode_utf_8 ($source) {
  22. // don't run though encoding function, if there is no encoded characters
  23. if (! preg_match("'&#'",$source) ) return $source;
  24. $utf8Str = '';
  25. $entityArray = explode ("&#", $source);
  26. $size = count ($entityArray);
  27. for ($i = 0; $i < $size; $i++) {
  28. $subStr = $entityArray[$i];
  29. $nonEntity = strstr ($subStr, ';');
  30. if ($nonEntity !== false) {
  31. $unicode = intval (substr ($subStr, 0, (strpos ($subStr, ';') + 1)));
  32. // determine how many chars are needed to reprsent this unicode char
  33. if ($unicode < 128) {
  34. $utf8Substring = chr ($unicode);
  35. }
  36. else if ($unicode >= 128 && $unicode < 2048) {
  37. $binVal = str_pad (decbin ($unicode), 11, "0", STR_PAD_LEFT);
  38. $binPart1 = substr ($binVal, 0, 5);
  39. $binPart2 = substr ($binVal, 5);
  40. $char1 = chr (192 + bindec ($binPart1));
  41. $char2 = chr (128 + bindec ($binPart2));
  42. $utf8Substring = $char1 . $char2;
  43. }
  44. else if ($unicode >= 2048 && $unicode < 65536) {
  45. $binVal = str_pad (decbin ($unicode), 16, "0", STR_PAD_LEFT);
  46. $binPart1 = substr ($binVal, 0, 4);
  47. $binPart2 = substr ($binVal, 4, 6);
  48. $binPart3 = substr ($binVal, 10);
  49. $char1 = chr (224 + bindec ($binPart1));
  50. $char2 = chr (128 + bindec ($binPart2));
  51. $char3 = chr (128 + bindec ($binPart3));
  52. $utf8Substring = $char1 . $char2 . $char3;
  53. }
  54. else {
  55. $binVal = str_pad (decbin ($unicode), 21, "0", STR_PAD_LEFT);
  56. $binPart1 = substr ($binVal, 0, 3);
  57. $binPart2 = substr ($binVal, 3, 6);
  58. $binPart3 = substr ($binVal, 9, 6);
  59. $binPart4 = substr ($binVal, 15);
  60. $char1 = chr (240 + bindec ($binPart1));
  61. $char2 = chr (128 + bindec ($binPart2));
  62. $char3 = chr (128 + bindec ($binPart3));
  63. $char4 = chr (128 + bindec ($binPart4));
  64. $utf8Substring = $char1 . $char2 . $char3 . $char4;
  65. }
  66. if (strlen ($nonEntity) > 1)
  67. $nonEntity = substr ($nonEntity, 1); // chop the first char (';')
  68. else
  69. $nonEntity = '';
  70. $utf8Str .= $utf8Substring . $nonEntity;
  71. }
  72. else {
  73. $utf8Str .= $subStr;
  74. }
  75. }
  76. return $utf8Str;
  77. }
  78. ?>