utf_8.php 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. <?php
  2. /**
  3. * utf-8 encoding function
  4. *
  5. * takes a string of unicode entities and converts it to a utf-8 encoded string
  6. * each unicode entitiy has the form &#nnn(nn); n={0..9} and can be displayed by utf-8 supporting
  7. * browsers. Ascii will not be modified.
  8. *
  9. * code is taken from www.php.net manual comments
  10. * Author: ronen at greyzone dot com
  11. *
  12. * @version $Id$
  13. * @package squirrelmail
  14. * @subpackage encode
  15. */
  16. /**
  17. * Converts string to utf-8
  18. * @param $source string of unicode entities [STRING]
  19. * @return a utf-8 encoded string [STRING]
  20. */
  21. function charset_encode_utf_8 ($source) {
  22. $utf8Str = '';
  23. $entityArray = explode ("&#", $source);
  24. $size = count ($entityArray);
  25. for ($i = 0; $i < $size; $i++) {
  26. $subStr = $entityArray[$i];
  27. $nonEntity = strstr ($subStr, ';');
  28. if ($nonEntity !== false) {
  29. $unicode = intval (substr ($subStr, 0, (strpos ($subStr, ';') + 1)));
  30. // determine how many chars are needed to reprsent this unicode char
  31. if ($unicode < 128) {
  32. $utf8Substring = chr ($unicode);
  33. }
  34. else if ($unicode >= 128 && $unicode < 2048) {
  35. $binVal = str_pad (decbin ($unicode), 11, "0", STR_PAD_LEFT);
  36. $binPart1 = substr ($binVal, 0, 5);
  37. $binPart2 = substr ($binVal, 5);
  38. $char1 = chr (192 + bindec ($binPart1));
  39. $char2 = chr (128 + bindec ($binPart2));
  40. $utf8Substring = $char1 . $char2;
  41. }
  42. else if ($unicode >= 2048 && $unicode < 65536) {
  43. $binVal = str_pad (decbin ($unicode), 16, "0", STR_PAD_LEFT);
  44. $binPart1 = substr ($binVal, 0, 4);
  45. $binPart2 = substr ($binVal, 4, 6);
  46. $binPart3 = substr ($binVal, 10);
  47. $char1 = chr (224 + bindec ($binPart1));
  48. $char2 = chr (128 + bindec ($binPart2));
  49. $char3 = chr (128 + bindec ($binPart3));
  50. $utf8Substring = $char1 . $char2 . $char3;
  51. }
  52. else {
  53. $binVal = str_pad (decbin ($unicode), 21, "0", STR_PAD_LEFT);
  54. $binPart1 = substr ($binVal, 0, 3);
  55. $binPart2 = substr ($binVal, 3, 6);
  56. $binPart3 = substr ($binVal, 9, 6);
  57. $binPart4 = substr ($binVal, 15);
  58. $char1 = chr (240 + bindec ($binPart1));
  59. $char2 = chr (128 + bindec ($binPart2));
  60. $char3 = chr (128 + bindec ($binPart3));
  61. $char4 = chr (128 + bindec ($binPart4));
  62. $utf8Substring = $char1 . $char2 . $char3 . $char4;
  63. }
  64. if (strlen ($nonEntity) > 1)
  65. $nonEntity = substr ($nonEntity, 1); // chop the first char (';')
  66. else
  67. $nonEntity = '';
  68. $utf8Str .= $utf8Substring . $nonEntity;
  69. }
  70. else {
  71. $utf8Str .= $subStr;
  72. }
  73. }
  74. return $utf8Str;
  75. }
  76. ?>