utf_8.php 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. <?php
  2. /**
  3. * utf-8 encoding functions
  4. *
  5. * takes a string of unicode entities and converts it to a utf-8 encoded string
  6. * each unicode entitiy has the form &#nnn(nn); n={0..9} and can be displayed by utf-8 supporting
  7. * browsers. Ascii will not be modified.
  8. *
  9. * Original code is taken from www.php.net manual comments
  10. * Original author: ronen at greyzone dot com
  11. *
  12. * @version $Id$
  13. * @copyright Copyright &copy; 2004-2005 The SquirrelMail Project Team
  14. * @package squirrelmail
  15. * @subpackage encode
  16. */
  17. /**
  18. * Converts string to utf-8
  19. * @param string $string text with numeric unicode entities
  20. * @return string utf-8 encoded text
  21. */
  22. function charset_encode_utf_8 ($string) {
  23. // don't run encoding function, if there is no encoded characters
  24. if (! preg_match("'&#[0-9]+;'",$string) ) return $string;
  25. $string=preg_replace("/&#([0-9]+);/e","unicodetoutf8('\\1')",$string);
  26. // $string=preg_replace("/&#[xX]([0-9A-F]+);/e","unicodetoutf8(hexdec('\\1'))",$string);
  27. return $string;
  28. }
  29. /**
  30. * Return utf8 symbol when unicode character number is provided
  31. *
  32. * This function is used internally by charset_encode_utf_8
  33. * function. It might be unavailable to other SquirrelMail functions.
  34. * Don't use it or make sure, that functions/encode/utf_8.php is
  35. * included.
  36. *
  37. * @param int $var decimal unicode value
  38. * @return string utf8 character
  39. */
  40. function unicodetoutf8($var) {
  41. if ($var < 128) {
  42. $ret = chr ($var);
  43. } else if ($var < 2048) {
  44. // Two byte utf-8
  45. $binVal = str_pad (decbin ($var), 11, "0", STR_PAD_LEFT);
  46. $binPart1 = substr ($binVal, 0, 5);
  47. $binPart2 = substr ($binVal, 5);
  48. $char1 = chr (192 + bindec ($binPart1));
  49. $char2 = chr (128 + bindec ($binPart2));
  50. $ret = $char1 . $char2;
  51. } else if ($var < 65536) {
  52. // Three byte utf-8
  53. $binVal = str_pad (decbin ($var), 16, "0", STR_PAD_LEFT);
  54. $binPart1 = substr ($binVal, 0, 4);
  55. $binPart2 = substr ($binVal, 4, 6);
  56. $binPart3 = substr ($binVal, 10);
  57. $char1 = chr (224 + bindec ($binPart1));
  58. $char2 = chr (128 + bindec ($binPart2));
  59. $char3 = chr (128 + bindec ($binPart3));
  60. $ret = $char1 . $char2 . $char3;
  61. } else if ($var < 2097152) {
  62. // Four byte utf-8
  63. $binVal = str_pad (decbin ($var), 21, "0", STR_PAD_LEFT);
  64. $binPart1 = substr ($binVal, 0, 3);
  65. $binPart2 = substr ($binVal, 3, 6);
  66. $binPart3 = substr ($binVal, 9, 6);
  67. $binPart4 = substr ($binVal, 15);
  68. $char1 = chr (240 + bindec ($binPart1));
  69. $char2 = chr (128 + bindec ($binPart2));
  70. $char3 = chr (128 + bindec ($binPart3));
  71. $char4 = chr (128 + bindec ($binPart4));
  72. $ret = $char1 . $char2 . $char3 . $char4;
  73. } else if ($var < 67108864) {
  74. // Five byte utf-8
  75. $binVal = str_pad (decbin ($var), 26, "0", STR_PAD_LEFT);
  76. $binPart1 = substr ($binVal, 0, 2);
  77. $binPart2 = substr ($binVal, 2, 6);
  78. $binPart3 = substr ($binVal, 8, 6);
  79. $binPart4 = substr ($binVal, 14,6);
  80. $binPart5 = substr ($binVal, 20);
  81. $char1 = chr (248 + bindec ($binPart1));
  82. $char2 = chr (128 + bindec ($binPart2));
  83. $char3 = chr (128 + bindec ($binPart3));
  84. $char4 = chr (128 + bindec ($binPart4));
  85. $char5 = chr (128 + bindec ($binPart5));
  86. $ret = $char1 . $char2 . $char3 . $char4 . $char5;
  87. } else if ($var < 2147483648) {
  88. // Six byte utf-8
  89. $binVal = str_pad (decbin ($var), 31, "0", STR_PAD_LEFT);
  90. $binPart1 = substr ($binVal, 0, 1);
  91. $binPart2 = substr ($binVal, 1, 6);
  92. $binPart3 = substr ($binVal, 7, 6);
  93. $binPart4 = substr ($binVal, 13,6);
  94. $binPart5 = substr ($binVal, 19,6);
  95. $binPart6 = substr ($binVal, 25);
  96. $char1 = chr (252 + bindec ($binPart1));
  97. $char2 = chr (128 + bindec ($binPart2));
  98. $char3 = chr (128 + bindec ($binPart3));
  99. $char4 = chr (128 + bindec ($binPart4));
  100. $char5 = chr (128 + bindec ($binPart5));
  101. $char6 = chr (128 + bindec ($binPart6));
  102. $ret = $char1 . $char2 . $char3 . $char4 . $char5 . $char6;
  103. } else {
  104. // there is no such symbol in utf-8
  105. $ret='?';
  106. }
  107. return $ret;
  108. }
  109. ?>