php_pspell.php 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. <?php
  2. /**
  3. * PHP pspell spellcheck class functions
  4. * @copyright &copy; 2006 The SquirrelMail Project Team
  5. * @license http://opensource.org/licenses/gpl-license.php GNU Public License
  6. * @version $Id$
  7. * @package plugins
  8. * @subpackage squirrelspell
  9. */
  10. /**
  11. * PHP Pspell class
  12. * @package plugins
  13. * @subpackage squirrelspell
  14. */
  15. class php_pspell extends squirrelspell {
  16. //
  17. var $dict = 'en';
  18. var $subdict = '';
  19. var $jargon = '';
  20. var $charset = 'utf-8';
  21. var $mode = null;
  22. var $userdic = array();
  23. /**
  24. */
  25. var $missed_words = array();
  26. /**
  27. * Error buffer
  28. * @var string
  29. */
  30. var $error = '';
  31. /**
  32. */
  33. var $dictionary_link = null;
  34. /**
  35. * Constructor function
  36. * @param array $aParams
  37. */
  38. function php_pspell($aParams=array()) {
  39. if (! extension_loaded('pspell')) {
  40. return $this->set_error('Pspell extension is not available');
  41. }
  42. //
  43. if (isset($aParams['dictionary'])) {
  44. $aDict = explode(',',$aParams['dictionary']);
  45. if (isset($aDict[0])) $this->dict = trim($aDict[0]);
  46. if (isset($aDict[1])) $this->subdict = trim($aDict[1]);
  47. if (isset($aDict[2])) $this->jargon = trim($aDict[2]);
  48. }
  49. if (isset($aParams['charset'])) {
  50. $this->charset = $aParams['charset'];
  51. }
  52. if (isset($aParams['userdic'])) {
  53. $this->userdic = $aParams['userdic'];
  54. }
  55. if (isset($aParams['mode'])) {
  56. $this->mode = $aParams['mode'];
  57. } else {
  58. $this->mode = PSPELL_FAST;
  59. }
  60. // dict, subdict, jargon, charset, spellcheck_type
  61. $this->dictionary_link = pspell_new($this->dict,$this->subdict,$this->jargon,$this->charset,$this->mode);
  62. }
  63. // private functions
  64. function check_word($sWord) {
  65. return pspell_check($this->dictionary_link,$sWord);
  66. }
  67. function suggest($sWord) {
  68. return pspell_suggest($this->dictionary_link,$sWord);
  69. }
  70. // public function
  71. /**
  72. * Check block of text
  73. * @return array
  74. */
  75. function check_text($sText) {
  76. // resets missed words array
  77. $this->missed_words = array();
  78. $line = 0;
  79. $start = 0;
  80. $position = 0;
  81. $word = '';
  82. // parse text. sq_* functions are used in order to work with characters and not with bytes
  83. for ($i = 0; $i <= sq_strlen($sText,$this->charset); $i++) {
  84. if ($i == sq_strlen($sText,$this->charset)) {
  85. // add space in order to check last $word.
  86. $char = ' ';
  87. } else {
  88. $char = sq_substr($sText,$i,1,$this->charset);
  89. }
  90. // Current
  91. switch($char) {
  92. case ' ':
  93. case '.':
  94. case ';':
  95. case "\t":
  96. case "\r":
  97. case "\n":
  98. if (!empty($word)) {
  99. if (isset($this->missed_words[$word]) || !$this->check_word($word)) {
  100. if (! isset($this->missed_words[$word]['suggestions'])) {
  101. $this->missed_words[$word]['suggestions'] = $this->suggest($word);
  102. }
  103. $this->missed_words[$word]['locations'][] = "$line:$start";
  104. }
  105. $word = '';
  106. }
  107. if ($char == "\n") {
  108. $position = 0;
  109. $line++;
  110. } else {
  111. $position++;
  112. }
  113. break;
  114. default:
  115. // a-zA-Z0-9' + 8bit chars (nbspace and other spaces excluded, depends on charset)
  116. // add char to word
  117. if(empty($word)) {
  118. $start = $position; // squirrelspell adds one space to checked text
  119. }
  120. $position++;
  121. $word.=$char;
  122. }
  123. }
  124. return $this->missed_words;
  125. }
  126. }