mime.php 57 KB


  1. <?php
  2. /**
  3. * mime.php
  4. *
  5. * Copyright (c) 1999-2003 The SquirrelMail Project Team
  6. * Licensed under the GNU GPL. For full terms see the file COPYING.
  7. *
  8. * This contains the functions necessary to detect and decode MIME
  9. * messages.
  10. *
  11. * $Id$
  12. */
  13. require_once(SM_PATH . 'functions/imap.php');
  14. require_once(SM_PATH . 'functions/attachment_common.php');
  15. /* --------------------------------------------------------------------------------- */
  16. /* MIME DECODING */
  17. /* --------------------------------------------------------------------------------- */
  18. /* This function gets the structure of a message and stores it in the "message" class.
  19. * It will return this object for use with all relevant header information and
  20. * fully parsed into the standard "message" object format.
  21. */
  22. function mime_structure ($bodystructure, $flags=array()) {
  23. /* Isolate the body structure and remove beginning and end parenthesis. */
  24. $read = trim(substr ($bodystructure, strpos(strtolower($bodystructure), 'bodystructure') + 13));
  25. $read = trim(substr ($read, 0, -1));
  26. $i = 0;
  27. $msg = Message::parseStructure($read,$i);
  28. if (!is_object($msg)) {
  29. include_once(SM_PATH . 'functions/display_messages.php');
  30. global $color, $mailbox;
  31. displayPageHeader( $color, urldecode($mailbox) );
  32. echo "<BODY TEXT=\"$color[8]\" BGCOLOR=\"$color[4]\" LINK=\"$color[7]\" VLINK=\"$color[7]\" ALINK=\"$color[7]\">\n\n" .
  33. '<CENTER>';
  34. $errormessage = _("SquirrelMail could not decode the bodystructure of the message");
  35. $errormessage .= '<BR>'._("the provided bodystructure by your imap-server").':<BR><BR>';
  36. $errormessage .= '<table><tr><td>' . htmlspecialchars($read) . '</td></tr></table>';
  37. plain_error_message( $errormessage, $color );
  38. echo '</body></html>';
  39. exit;
  40. }
  41. if (count($flags)) {
  42. foreach ($flags as $flag) {
  43. $char = strtoupper($flag{1});
  44. switch ($char) {
  45. case 'S':
  46. if (strtolower($flag) == '\\seen') {
  47. $msg->is_seen = true;
  48. }
  49. break;
  50. case 'A':
  51. if (strtolower($flag) == '\\answered') {
  52. $msg->is_answered = true;
  53. }
  54. break;
  55. case 'D':
  56. if (strtolower($flag) == '\\deleted') {
  57. $msg->is_deleted = true;
  58. }
  59. break;
  60. case 'F':
  61. if (strtolower($flag) == '\\flagged') {
  62. $msg->is_flagged = true;
  63. }
  64. break;
  65. case 'M':
  66. if (strtolower($flag) == '$mdnsent') {
  67. $msg->is_mdnsent = true;
  68. }
  69. break;
  70. default:
  71. break;
  72. }
  73. }
  74. }
  75. // listEntities($msg);
  76. return $msg;
  77. }
  78. /* This starts the parsing of a particular structure. It is called recursively,
  79. * so it can be passed different structures. It returns an object of type
  80. * $message.
  81. * First, it checks to see if it is a multipart message. If it is, then it
  82. * handles that as it sees is necessary. If it is just a regular entity,
  83. * then it parses it and adds the necessary header information (by calling out
  84. * to mime_get_elements()
  85. */
  86. function mime_fetch_body($imap_stream, $id, $ent_id=1) {
  87. global $uid_support;
  88. /* Do a bit of error correction. If we couldn't find the entity id, just guess
  89. * that it is the first one. That is usually the case anyway.
  90. */
  91. if (!$ent_id) {
  92. $cmd = "FETCH $id BODY[]";
  93. } else {
  94. $cmd = "FETCH $id BODY[$ent_id]";
  95. }
  96. $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message, $uid_support);
  97. do {
  98. $topline = trim(array_shift($data));
  99. } while($topline && ($topline[0] == '*') && !preg_match('/\* [0-9]+ FETCH.*/i', $topline)) ;
  100. $wholemessage = implode('', $data);
  101. if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) {
  102. $ret = substr($wholemessage, 0, $regs[1]);
  103. /* There is some information in the content info header that could be important
  104. * in order to parse html messages. Let's get them here.
  105. */
  106. if ($ret{0} == '<') {
  107. $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message, $uid_support);
  108. }
  109. } else if (ereg('"([^"]*)"', $topline, $regs)) {
  110. $ret = $regs[1];
  111. } else {
  112. global $where, $what, $mailbox, $passed_id, $startMessage;
  113. $par = 'mailbox=' . urlencode($mailbox) . '&amp;passed_id=' . $passed_id;
  114. if (isset($where) && isset($what)) {
  115. $par .= '&amp;where=' . urlencode($where) . '&amp;what=' . urlencode($what);
  116. } else {
  117. $par .= '&amp;startMessage=' . $startMessage . '&amp;show_more=0';
  118. }
  119. $par .= '&amp;response=' . urlencode($response) .
  120. '&amp;message=' . urlencode($message) .
  121. '&amp;topline=' . urlencode($topline);
  122. echo '<tt><br>' .
  123. '<table width="80%"><tr>' .
  124. '<tr><td colspan=2>' .
  125. _("Body retrieval error. The reason for this is most probably that the message is malformed.") .
  126. '</td></tr>' .
  127. '<tr><td><b>' . _("Command:") . "</td><td>$cmd</td></tr>" .
  128. '<tr><td><b>' . _("Response:") . "</td><td>$response</td></tr>" .
  129. '<tr><td><b>' . _("Message:") . "</td><td>$message</td></tr>" .
  130. '<tr><td><b>' . _("FETCH line:") . "</td><td>$topline</td></tr>" .
  131. "</table><BR></tt></font><hr>";
  132. $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message, $uid_support);
  133. array_shift($data);
  134. $wholemessage = implode('', $data);
  135. $ret = $wholemessage;
  136. }
  137. return $ret;
  138. }
  139. function mime_print_body_lines ($imap_stream, $id, $ent_id=1, $encoding) {
  140. global $uid_support;
  141. $sid = sqimap_session_id($uid_support);
  142. /* Don't kill the connection if the browser is over a dialup
  143. * and it would take over 30 seconds to download it.
  144. * Don´t call set_time_limit in safe mode.
  145. */
  146. if (!ini_get('safe_mode')) {
  147. set_time_limit(0);
  148. }
  149. if ($uid_support) {
  150. $sid_s = substr($sid,0,strpos($sid, ' '));
  151. } else {
  152. $sid_s = $sid;
  153. }
  154. $body = mime_fetch_body ($imap_stream, $id, $ent_id);
  155. echo decodeBody($body, $encoding);
  156. return;
  157. /*
  158. fputs ($imap_stream, "$sid FETCH $id BODY[$ent_id]\r\n");
  159. $cnt = 0;
  160. $continue = true;
  161. $read = fgets ($imap_stream,8192);
  162. // This could be bad -- if the section has sqimap_session_id() . ' OK'
  163. // or similar, it will kill the download.
  164. while (!ereg("^".$sid_s." (OK|BAD|NO)(.*)$", $read, $regs)) {
  165. if (trim($read) == ')==') {
  166. $read1 = $read;
  167. $read = fgets ($imap_stream,4096);
  168. if (ereg("^".$sid." (OK|BAD|NO)(.*)$", $read, $regs)) {
  169. return;
  170. } else {
  171. echo decodeBody($read1, $encoding) .
  172. decodeBody($read, $encoding);
  173. }
  174. } else if ($cnt) {
  175. echo decodeBody($read, $encoding);
  176. }
  177. $read = fgets ($imap_stream,4096);
  178. $cnt++;
  179. // break;
  180. }
  181. */
  182. }
  183. /* -[ END MIME DECODING ]----------------------------------------------------------- */
  184. /* This is here for debugging purposes. It will print out a list
  185. * of all the entity IDs that are in the $message object.
  186. */
  187. function listEntities ($message) {
  188. if ($message) {
  189. echo "<tt>" . $message->entity_id . ' : ' . $message->type0 . '/' . $message->type1 . ' parent = '. $message->parent->entity_id. '<br>';
  190. for ($i = 0; isset($message->entities[$i]); $i++) {
  191. echo "$i : ";
  192. $msg = listEntities($message->entities[$i]);
  193. if ($msg) {
  194. echo "return: ";
  195. return $msg;
  196. }
  197. }
  198. }
  199. }
  200. function getPriorityStr($priority) {
  201. $priority_level = substr($priority,0,1);
  202. switch($priority_level) {
  203. /* Check for a higher then normal priority. */
  204. case '1':
  205. case '2':
  206. $priority_string = _("High");
  207. break;
  208. /* Check for a lower then normal priority. */
  209. case '4':
  210. case '5':
  211. $priority_string = _("Low");
  212. break;
  213. /* Check for a normal priority. */
  214. case '3':
  215. default:
  216. $priority_level = '3';
  217. $priority_string = _("Normal");
  218. break;
  219. }
  220. return $priority_string;
  221. }
  222. /* returns a $message object for a particular entity id */
  223. function getEntity ($message, $ent_id) {
  224. return $message->getEntity($ent_id);
  225. }
  226. /* translateText
  227. * Extracted from strings.php 23/03/2002
  228. */
  229. function translateText(&$body, $wrap_at, $charset) {
  230. global $where, $what; /* from searching */
  231. global $color; /* color theme */
  232. require_once(SM_PATH . 'functions/url_parser.php');
  233. $body_ary = explode("\n", $body);
  234. for ($i=0; $i < count($body_ary); $i++) {
  235. $line = $body_ary[$i];
  236. if (strlen($line) - 2 >= $wrap_at) {
  237. sqWordWrap($line, $wrap_at);
  238. }
  239. $line = charset_decode($charset, $line);
  240. $line = str_replace("\t", ' ', $line);
  241. parseUrl ($line);
  242. $quotes = 0;
  243. $pos = 0;
  244. $j = strlen($line);
  245. while ($pos < $j) {
  246. if ($line[$pos] == ' ') {
  247. $pos++;
  248. } else if (strpos($line, '&gt;', $pos) === $pos) {
  249. $pos += 4;
  250. $quotes++;
  251. } else {
  252. break;
  253. }
  254. }
  255. if ($quotes > 1) {
  256. if (!isset($color[14])) {
  257. $color[14] = '#FF0000';
  258. }
  259. $line = '<FONT COLOR="' . $color[14] . '">' . $line . '</FONT>';
  260. } elseif ($quotes) {
  261. if (!isset($color[13])) {
  262. $color[13] = '#800000';
  263. }
  264. $line = '<FONT COLOR="' . $color[13] . '">' . $line . '</FONT>';
  265. }
  266. $body_ary[$i] = $line;
  267. }
  268. $body = '<pre>' . implode("\n", $body_ary) . '</pre>';
  269. }
  270. /* This returns a parsed string called $body. That string can then
  271. * be displayed as the actual message in the HTML. It contains
  272. * everything needed, including HTML Tags, Attachments at the
  273. * bottom, etc.
  274. */
  275. function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $mailbox='INBOX') {
  276. /* This if statement checks for the entity to show as the
  277. * primary message. To add more of them, just put them in the
  278. * order that is their priority.
  279. */
  280. global $startMessage, $username, $key, $imapServerAddress, $imapPort,
  281. $show_html_default, $sort, $has_unsafe_images, $passed_ent_id;
  282. if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) {
  283. $view_unsafe_images = false;
  284. }
  285. $body = '';
  286. $urlmailbox = urlencode($mailbox);
  287. $body_message = getEntity($message, $ent_num);
  288. if (($body_message->header->type0 == 'text') ||
  289. ($body_message->header->type0 == 'rfc822')) {
  290. $body = mime_fetch_body ($imap_stream, $id, $ent_num);
  291. $body = decodeBody($body, $body_message->header->encoding);
  292. $hookResults = do_hook("message_body", $body);
  293. $body = $hookResults[1];
  294. /* If there are other types that shouldn't be formatted, add
  295. * them here.
  296. */
  297. if ($body_message->header->type1 == 'html') {
  298. if ($show_html_default <> 1) {
  299. $entity_conv = array('&nbsp;' => ' ',
  300. '<p>' => "\n",
  301. '<br>' => "\n",
  302. '<P>' => "\n",
  303. '<BR>' => "\n",
  304. '&gt;' => '>',
  305. '&lt;' => '<');
  306. $body = strtr($body, $entity_conv);
  307. $body = strip_tags($body);
  308. $body = trim($body);
  309. translateText($body, $wrap_at,
  310. $body_message->header->getParameter('charset'));
  311. } else {
  312. $body = magicHTML($body, $id, $message, $mailbox);
  313. }
  314. } else {
  315. translateText($body, $wrap_at,
  316. $body_message->header->getParameter('charset'));
  317. }
  318. $link = 'read_body.php?passed_id=' . $id . '&amp;ent_id='.$ent_num.
  319. '&amp;mailbox=' . $urlmailbox .'&amp;sort=' . $sort .
  320. '&amp;startMessage=' . $startMessage . '&amp;show_more=0';
  321. if (isset($passed_ent_id)) {
  322. $link .= '&amp;passed_ent_id='.$passed_ent_id;
  323. }
  324. if ($view_unsafe_images) {
  325. $text = _("Hide Unsafe Images");
  326. } else {
  327. if (isset($has_unsafe_images) && $has_unsafe_images) {
  328. $link .= '&amp;view_unsafe_images=1';
  329. $text = _("View Unsafe Images");
  330. } else {
  331. $text = '';
  332. }
  333. }
  334. $body .= '<center><small><a href="'.$link.'">'.$text.
  335. '</a></small></center><br>' . "\n";
  336. }
  337. return $body;
  338. }
  339. function formatAttachments($message, $exclude_id, $mailbox, $id) {
  340. global $where, $what, $startMessage, $color;
  341. static $ShownHTML = 0;
  342. $att_ar = $message->getAttachments($exclude_id);
  343. if (!count($att_ar)) return '';
  344. $attachments = '';
  345. $urlMailbox = urlencode($mailbox);
  346. foreach ($att_ar as $att) {
  347. $ent = urldecode($att->entity_id);
  348. $header = $att->header;
  349. $type0 = strtolower($header->type0);
  350. $type1 = strtolower($header->type1);
  351. $name = '';
  352. $links['download link']['text'] = _("download");
  353. $links['download link']['href'] =
  354. "../src/download.php?absolute_dl=true&amp;passed_id=$id&amp;mailbox=$urlMailbox&amp;ent_id=$ent";
  355. $ImageURL = '';
  356. if ($type0 =='message' && $type1 == 'rfc822') {
  357. $default_page = '../src/read_body.php';
  358. $rfc822_header = $att->rfc822_header;
  359. $filename = decodeHeader($rfc822_header->subject);
  360. if (trim( $filename ) == '') {
  361. $filename = 'untitled-[' . $ent . ']' ;
  362. }
  363. $from_o = $rfc822_header->from;
  364. if (is_object($from_o)) {
  365. $from_name = $from_o->getAddress(false);
  366. } else {
  367. $from_name = _("Unknown sender");
  368. }
  369. $from_name = decodeHeader(htmlspecialchars($from_name));
  370. $description = $from_name;
  371. } else {
  372. $default_page = '../src/download.php';
  373. if (is_object($header->disposition)) {
  374. $filename = decodeHeader($header->disposition->getProperty('filename'));
  375. if (trim($filename) == '') {
  376. $name = decodeHeader($header->disposition->getProperty('name'));
  377. if (trim($name) == '') {
  378. $name = decodeHeader($header->getParameter('name'));
  379. if(trim($name) == '') {
  380. if (trim( $header->id ) == '') {
  381. $filename = 'untitled-[' . $ent . ']' ;
  382. } else {
  383. $filename = 'cid: ' . $header->id;
  384. }
  385. } else {
  386. $filename = $name;
  387. }
  388. } else {
  389. $filename = $name;
  390. }
  391. }
  392. } else {
  393. $filename = decodeHeader($header->getParameter('name'));
  394. if (!trim($filename)) {
  395. if (trim( $header->id ) == '') {
  396. $filename = 'untitled-[' . $ent . ']' ;
  397. } else {
  398. $filename = 'cid: ' . $header->id;
  399. }
  400. }
  401. }
  402. if ($header->description) {
  403. $description = htmlspecialchars($header->description);
  404. } else {
  405. $description = '';
  406. }
  407. }
  408. $display_filename = $filename;
  409. if (isset($passed_ent_id)) {
  410. $passed_ent_id_link = '&amp;passed_ent_id='.$passed_ent_id;
  411. } else {
  412. $passed_ent_id_link = '';
  413. }
  414. $defaultlink = $default_page . "?startMessage=$startMessage"
  415. . "&amp;passed_id=$id&amp;mailbox=$urlMailbox"
  416. . '&amp;ent_id='.$ent.$passed_ent_id_link.'&amp;absolute_dl=true';
  417. if ($where && $what) {
  418. $defaultlink .= '&amp;where='. urlencode($where).'&amp;what='.urlencode($what);
  419. }
  420. /* This executes the attachment hook with a specific MIME-type.
  421. * If that doesn't have results, it tries if there's a rule
  422. * for a more generic type.
  423. */
  424. $hookresults = do_hook("attachment $type0/$type1", $links,
  425. $startMessage, $id, $urlMailbox, $ent, $defaultlink,
  426. $display_filename, $where, $what);
  427. if(count($hookresults[1]) <= 1) {
  428. $hookresults = do_hook("attachment $type0/*", $links,
  429. $startMessage, $id, $urlMailbox, $ent, $defaultlink,
  430. $display_filename, $where, $what);
  431. }
  432. $links = $hookresults[1];
  433. $defaultlink = $hookresults[6];
  434. $attachments .= '<TR><TD>' .
  435. "<A HREF=\"$defaultlink\">$display_filename</A>&nbsp;</TD>" .
  436. '<TD><SMALL><b>' . show_readable_size($header->size) .
  437. '</b>&nbsp;&nbsp;</small></TD>' .
  438. "<TD><SMALL>[ $type0/$type1 ]&nbsp;</SMALL></TD>" .
  439. '<TD><SMALL>';
  440. $attachments .= '<b>' . $description . '</b>';
  441. $attachments .= '</SMALL></TD><TD><SMALL>&nbsp;';
  442. $skipspaces = 1;
  443. foreach ($links as $val) {
  444. if ($skipspaces) {
  445. $skipspaces = 0;
  446. } else {
  447. $attachments .= '&nbsp;&nbsp;|&nbsp;&nbsp;';
  448. }
  449. $attachments .= '<a href="' . $val['href'] . '">' . $val['text'] . '</a>';
  450. }
  451. unset($links);
  452. $attachments .= "</TD></TR>\n";
  453. }
  454. return $attachments;
  455. }
  456. /* This function decodes the body depending on the encoding type. */
  457. function decodeBody($body, $encoding) {
  458. global $languages, $squirrelmail_language;
  459. global $show_html_default;
  460. $body = str_replace("\r\n", "\n", $body);
  461. $encoding = strtolower($encoding);
  462. if ($encoding == 'quoted-printable' ||
  463. $encoding == 'quoted_printable') {
  464. $body = quoted_printable_decode($body);
  465. while (ereg("=\n", $body)) {
  466. $body = ereg_replace ("=\n", '', $body);
  467. }
  468. } else if ($encoding == 'base64') {
  469. $body = base64_decode($body);
  470. }
  471. if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
  472. function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) {
  473. $body = $languages[$squirrelmail_language]['XTRA_CODE']('decode', $body);
  474. }
  475. // All other encodings are returned raw.
  476. return $body;
  477. }
  478. /*
  479. * This functions decode strings that is encoded according to
  480. * RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text).
  481. * Patched by Christian Schmidt <christian@ostenfeld.dk> 23/03/2002
  482. */
  483. function decodeHeader ($string, $utfencode=true) {
  484. global $languages, $squirrelmail_language;
  485. if (is_array($string)) {
  486. $string = implode("\n", $string);
  487. }
  488. if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
  489. function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) {
  490. $string = $languages[$squirrelmail_language]['XTRA_CODE']('decodeheader', $string);
  491. }
  492. $i = 0;
  493. while (preg_match('/^(.{' . $i . '})(.*)=\?([^?]*)\?(Q|B)\?([^?]*)\?=/Ui',
  494. $string, $res)) {
  495. $prefix = $res[1];
  496. /* Ignore white-space between consecutive encoded-words. */
  497. if (strspn($res[2], " \t") != strlen($res[2])) {
  498. $prefix .= $res[2];
  499. }
  500. if (ucfirst($res[4]) == 'B') {
  501. $replace = base64_decode($res[5]);
  502. } else {
  503. $replace = str_replace('_', ' ', $res[5]);
  504. $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))',
  505. $replace);
  506. /* Only encode into entities by default. Some places
  507. * don't need the encoding, like the compose form.
  508. */
  509. if ($utfencode) {
  510. $replace = charset_decode($res[3], $replace);
  511. }
  512. }
  513. $string = $prefix . $replace . substr($string, strlen($res[0]));
  514. $i = strlen($prefix) + strlen($replace);
  515. }
  516. return $string;
  517. }
  518. /*
  519. * Encode a string according to RFC 1522 for use in headers if it
  520. * contains 8-bit characters or anything that looks like it should
  521. * be encoded.
  522. */
  523. function encodeHeader ($string) {
  524. global $default_charset, $languages, $squirrelmail_language;
  525. if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
  526. function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) {
  527. return $languages[$squirrelmail_language]['XTRA_CODE']('encodeheader', $string);
  528. }
  529. // Encode only if the string contains 8-bit characters or =?
  530. $j = strlen($string);
  531. $l = strstr($string, '=?'); // Must be encoded ?
  532. $ret = '';
  533. for($i = 0; $i < $j; ++$i) {
  534. switch($string{$i}) {
  535. case '=':
  536. $ret .= '=3D';
  537. break;
  538. case '?':
  539. $ret .= '=3F';
  540. break;
  541. case '_':
  542. $ret .= '=5F';
  543. break;
  544. case ' ':
  545. $ret .= '_';
  546. break;
  547. default:
  548. $k = ord($string{$i});
  549. if ($k > 126) {
  550. $ret .= sprintf("=%02X", $k);
  551. $l = TRUE;
  552. } else {
  553. $ret .= $string{$i};
  554. }
  555. break;
  556. }
  557. }
  558. if ($l) {
  559. $string = "=?$default_charset?Q?$ret?=";
  560. }
  561. return $string;
  562. }
  563. /* This function trys to locate the entity_id of a specific mime element */
  564. function find_ent_id($id, $message) {
  565. for ($i = 0, $ret = ''; $ret == '' && $i < count($message->entities); $i++) {
  566. if ($message->entities[$i]->header->type0 == 'multipart') {
  567. $ret = find_ent_id($id, $message->entities[$i]);
  568. } else {
  569. if (strcasecmp($message->entities[$i]->header->id, $id) == 0) {
  570. // if (sq_check_save_extension($message->entities[$i])) {
  571. return $message->entities[$i]->entity_id;
  572. // }
  573. }
  574. }
  575. }
  576. return $ret;
  577. }
  578. function sq_check_save_extension($message) {
  579. $filename = $message->getFilename();
  580. $ext = substr($filename, strrpos($filename,'.')+1);
  581. $save_extensions = array('jpg','jpeg','gif','png','bmp');
  582. return in_array($ext, $save_extensions);
  583. }
  584. /**
  585. ** HTMLFILTER ROUTINES
  586. */
  587. /**
  588. * This function returns the final tag out of the tag name, an array
  589. * of attributes, and the type of the tag. This function is called by
  590. * sq_sanitize internally.
  591. *
  592. * @param $tagname the name of the tag.
  593. * @param $attary the array of attributes and their values
  594. * @param $tagtype The type of the tag (see in comments).
  595. * @return a string with the final tag representation.
  596. */
  597. function sq_tagprint($tagname, $attary, $tagtype){
  598. $me = 'sq_tagprint';
  599. if ($tagtype == 2){
  600. $fulltag = '</' . $tagname . '>';
  601. } else {
  602. $fulltag = '<' . $tagname;
  603. if (is_array($attary) && sizeof($attary)){
  604. $atts = Array();
  605. while (list($attname, $attvalue) = each($attary)){
  606. array_push($atts, "$attname=$attvalue");
  607. }
  608. $fulltag .= ' ' . join(" ", $atts);
  609. }
  610. if ($tagtype == 3){
  611. $fulltag .= ' /';
  612. }
  613. $fulltag .= '>';
  614. }
  615. return $fulltag;
  616. }
  617. /**
  618. * A small helper function to use with array_walk. Modifies a by-ref
  619. * value and makes it lowercase.
  620. *
  621. * @param $val a value passed by-ref.
  622. * @return void since it modifies a by-ref value.
  623. */
  624. function sq_casenormalize(&$val){
  625. $val = strtolower($val);
  626. }
  627. /**
  628. * This function skips any whitespace from the current position within
  629. * a string and to the next non-whitespace value.
  630. *
  631. * @param $body the string
  632. * @param $offset the offset within the string where we should start
  633. * looking for the next non-whitespace character.
  634. * @return the location within the $body where the next
  635. * non-whitespace char is located.
  636. */
  637. function sq_skipspace($body, $offset){
  638. $me = 'sq_skipspace';
  639. preg_match('/^(\s*)/s', substr($body, $offset), $matches);
  640. if (sizeof($matches{1})){
  641. $count = strlen($matches{1});
  642. $offset += $count;
  643. }
  644. return $offset;
  645. }
  646. /**
  647. * This function looks for the next character within a string. It's
  648. * really just a glorified "strpos", except it catches if failures
  649. * nicely.
  650. *
  651. * @param $body The string to look for needle in.
  652. * @param $offset Start looking from this position.
  653. * @param $needle The character/string to look for.
  654. * @return location of the next occurance of the needle, or
  655. * strlen($body) if needle wasn't found.
  656. */
  657. function sq_findnxstr($body, $offset, $needle){
  658. $me = 'sq_findnxstr';
  659. $pos = strpos($body, $needle, $offset);
  660. if ($pos === FALSE){
  661. $pos = strlen($body);
  662. }
  663. return $pos;
  664. }
  665. /**
  666. * This function takes a PCRE-style regexp and tries to match it
  667. * within the string.
  668. *
  669. * @param $body The string to look for needle in.
  670. * @param $offset Start looking from here.
  671. * @param $reg A PCRE-style regex to match.
  672. * @return Returns a false if no matches found, or an array
  673. * with the following members:
  674. * - integer with the location of the match within $body
  675. * - string with whatever content between offset and the match
  676. * - string with whatever it is we matched
  677. */
  678. function sq_findnxreg($body, $offset, $reg){
  679. $me = 'sq_findnxreg';
  680. $matches = Array();
  681. $retarr = Array();
  682. preg_match("%^(.*?)($reg)%s", substr($body, $offset), $matches);
  683. if (!$matches{0}){
  684. $retarr = false;
  685. } else {
  686. $retarr{0} = $offset + strlen($matches{1});
  687. $retarr{1} = $matches{1};
  688. $retarr{2} = $matches{2};
  689. }
  690. return $retarr;
  691. }
  692. /**
  693. * This function looks for the next tag.
  694. *
  695. * @param $body String where to look for the next tag.
  696. * @param $offset Start looking from here.
  697. * @return false if no more tags exist in the body, or
  698. * an array with the following members:
  699. * - string with the name of the tag
  700. * - array with attributes and their values
  701. * - integer with tag type (1, 2, or 3)
  702. * - integer where the tag starts (starting "<")
  703. * - integer where the tag ends (ending ">")
  704. * first three members will be false, if the tag is invalid.
  705. */
  706. function sq_getnxtag($body, $offset){
  707. $me = 'sq_getnxtag';
  708. if ($offset > strlen($body)){
  709. return false;
  710. }
  711. $lt = sq_findnxstr($body, $offset, "<");
  712. if ($lt == strlen($body)){
  713. return false;
  714. }
  715. /**
  716. * We are here:
  717. * blah blah <tag attribute="value">
  718. * \---------^
  719. */
  720. $pos = sq_skipspace($body, $lt+1);
  721. if ($pos >= strlen($body)){
  722. return Array(false, false, false, $lt, strlen($body));
  723. }
  724. /**
  725. * There are 3 kinds of tags:
  726. * 1. Opening tag, e.g.:
  727. * <a href="blah">
  728. * 2. Closing tag, e.g.:
  729. * </a>
  730. * 3. XHTML-style content-less tag, e.g.:
  731. * <img src="blah"/>
  732. */
  733. $tagtype = false;
  734. switch (substr($body, $pos, 1)){
  735. case '/':
  736. $tagtype = 2;
  737. $pos++;
  738. break;
  739. case '!':
  740. /**
  741. * A comment or an SGML declaration.
  742. */
  743. if (substr($body, $pos+1, 2) == "--"){
  744. $gt = strpos($body, "-->", $pos);
  745. if ($gt === false){
  746. $gt = strlen($body);
  747. } else {
  748. $gt += 2;
  749. }
  750. return Array(false, false, false, $lt, $gt);
  751. } else {
  752. $gt = sq_findnxstr($body, $pos, ">");
  753. return Array(false, false, false, $lt, $gt);
  754. }
  755. break;
  756. default:
  757. /**
  758. * Assume tagtype 1 for now. If it's type 3, we'll switch values
  759. * later.
  760. */
  761. $tagtype = 1;
  762. break;
  763. }
  764. $tag_start = $pos;
  765. $tagname = '';
  766. /**
  767. * Look for next [\W-_], which will indicate the end of the tag name.
  768. */
  769. $regary = sq_findnxreg($body, $pos, "[^\w\-_]");
  770. if ($regary == false){
  771. return Array(false, false, false, $lt, strlen($body));
  772. }
  773. list($pos, $tagname, $match) = $regary;
  774. $tagname = strtolower($tagname);
  775. /**
  776. * $match can be either of these:
  777. * '>' indicating the end of the tag entirely.
  778. * '\s' indicating the end of the tag name.
  779. * '/' indicating that this is type-3 xhtml tag.
  780. *
  781. * Whatever else we find there indicates an invalid tag.
  782. */
  783. switch ($match){
  784. case '/':
  785. /**
  786. * This is an xhtml-style tag with a closing / at the
  787. * end, like so: <img src="blah"/>. Check if it's followed
  788. * by the closing bracket. If not, then this tag is invalid
  789. */
  790. if (substr($body, $pos, 2) == "/>"){
  791. $pos++;
  792. $tagtype = 3;
  793. } else {
  794. $gt = sq_findnxstr($body, $pos, ">");
  795. $retary = Array(false, false, false, $lt, $gt);
  796. return $retary;
  797. }
  798. case '>':
  799. return Array($tagname, false, $tagtype, $lt, $pos);
  800. break;
  801. default:
  802. /**
  803. * Check if it's whitespace
  804. */
  805. if (!preg_match('/\s/', $match)){
  806. /**
  807. * This is an invalid tag! Look for the next closing ">".
  808. */
  809. $gt = sq_findnxstr($body, $offset, ">");
  810. return Array(false, false, false, $lt, $gt);
  811. }
  812. break;
  813. }
  814. /**
  815. * At this point we're here:
  816. * <tagname attribute='blah'>
  817. * \-------^
  818. *
  819. * At this point we loop in order to find all attributes.
  820. */
  821. $attname = '';
  822. $atttype = false;
  823. $attary = Array();
  824. while ($pos <= strlen($body)){
  825. $pos = sq_skipspace($body, $pos);
  826. if ($pos == strlen($body)){
  827. /**
  828. * Non-closed tag.
  829. */
  830. return Array(false, false, false, $lt, $pos);
  831. }
  832. /**
  833. * See if we arrived at a ">" or "/>", which means that we reached
  834. * the end of the tag.
  835. */
  836. $matches = Array();
  837. if (preg_match("%^(\s*)(>|/>)%s", substr($body, $pos), $matches)) {
  838. /**
  839. * Yep. So we did.
  840. */
  841. $pos += strlen($matches{1});
  842. if ($matches{2} == "/>"){
  843. $tagtype = 3;
  844. $pos++;
  845. }
  846. return Array($tagname, $attary, $tagtype, $lt, $pos);
  847. }
  848. /**
  849. * There are several types of attributes, with optional
  850. * [:space:] between members.
  851. * Type 1:
  852. * attrname[:space:]=[:space:]'CDATA'
  853. * Type 2:
  854. * attrname[:space:]=[:space:]"CDATA"
  855. * Type 3:
  856. * attr[:space:]=[:space:]CDATA
  857. * Type 4:
  858. * attrname
  859. *
  860. * We leave types 1 and 2 the same, type 3 we check for
  861. * '"' and convert to "&quot" if needed, then wrap in
  862. * double quotes. Type 4 we convert into:
  863. * attrname="yes".
  864. */
  865. $regary = sq_findnxreg($body, $pos, "[^\w\-_]");
  866. if ($regary == false){
  867. /**
  868. * Looks like body ended before the end of tag.
  869. */
  870. return Array(false, false, false, $lt, strlen($body));
  871. }
  872. list($pos, $attname, $match) = $regary;
  873. $attname = strtolower($attname);
  874. /**
  875. * We arrived at the end of attribute name. Several things possible
  876. * here:
  877. * '>' means the end of the tag and this is attribute type 4
  878. * '/' if followed by '>' means the same thing as above
  879. * '\s' means a lot of things -- look what it's followed by.
  880. * anything else means the attribute is invalid.
  881. */
  882. switch($match){
  883. case '/':
  884. /**
  885. * This is an xhtml-style tag with a closing / at the
  886. * end, like so: <img src="blah"/>. Check if it's followed
  887. * by the closing bracket. If not, then this tag is invalid
  888. */
  889. if (substr($body, $pos, 2) == "/>"){
  890. $pos++;
  891. $tagtype = 3;
  892. } else {
  893. $gt = sq_findnxstr($body, $pos, ">");
  894. $retary = Array(false, false, false, $lt, $gt);
  895. return $retary;
  896. }
  897. case '>':
  898. $attary{$attname} = '"yes"';
  899. return Array($tagname, $attary, $tagtype, $lt, $pos);
  900. break;
  901. default:
  902. /**
  903. * Skip whitespace and see what we arrive at.
  904. */
  905. $pos = sq_skipspace($body, $pos);
  906. $char = substr($body, $pos, 1);
  907. /**
  908. * Two things are valid here:
  909. * '=' means this is attribute type 1 2 or 3.
  910. * \w means this was attribute type 4.
  911. * anything else we ignore and re-loop. End of tag and
  912. * invalid stuff will be caught by our checks at the beginning
  913. * of the loop.
  914. */
  915. if ($char == "="){
  916. $pos++;
  917. $pos = sq_skipspace($body, $pos);
  918. /**
  919. * Here are 3 possibilities:
  920. * "'" attribute type 1
  921. * '"' attribute type 2
  922. * everything else is the content of tag type 3
  923. */
  924. $quot = substr($body, $pos, 1);
  925. if ($quot == "'"){
  926. $regary = sq_findnxreg($body, $pos+1, "\'");
  927. if ($regary == false){
  928. return Array(false, false, false, $lt, strlen($body));
  929. }
  930. list($pos, $attval, $match) = $regary;
  931. $pos++;
  932. $attary{$attname} = "'" . $attval . "'";
  933. } else if ($quot == '"'){
  934. $regary = sq_findnxreg($body, $pos+1, '\"');
  935. if ($regary == false){
  936. return Array(false, false, false, $lt, strlen($body));
  937. }
  938. list($pos, $attval, $match) = $regary;
  939. $pos++;
  940. $attary{$attname} = '"' . $attval . '"';
  941. } else {
  942. /**
  943. * These are hateful. Look for \s, or >.
  944. */
  945. $regary = sq_findnxreg($body, $pos, "[\s>]");
  946. if ($regary == false){
  947. return Array(false, false, false, $lt, strlen($body));
  948. }
  949. list($pos, $attval, $match) = $regary;
  950. /**
  951. * If it's ">" it will be caught at the top.
  952. */
  953. $attval = preg_replace("/\"/s", "&quot;", $attval);
  954. $attary{$attname} = '"' . $attval . '"';
  955. }
  956. } else if (preg_match("|[\w/>]|", $char)) {
  957. /**
  958. * That was attribute type 4.
  959. */
  960. $attary{$attname} = '"yes"';
  961. } else {
  962. /**
  963. * An illegal character. Find next '>' and return.
  964. */
  965. $gt = sq_findnxstr($body, $pos, ">");
  966. return Array(false, false, false, $lt, $gt);
  967. }
  968. break;
  969. }
  970. }
  971. /**
  972. * The fact that we got here indicates that the tag end was never
  973. * found. Return invalid tag indication so it gets stripped.
  974. */
  975. return Array(false, false, false, $lt, strlen($body));
  976. }
  977. /**
  978. * This function checks attribute values for entity-encoded values
  979. * and returns them translated into 8-bit strings so we can run
  980. * checks on them.
  981. *
  982. * @param $attvalue A string to run entity check against.
  983. * @return Translated value.
  984. */
  985. function sq_deent($attvalue){
  986. $me = 'sq_deent';
  987. /**
  988. * See if we have to run the checks first. All entities must start
  989. * with "&".
  990. */
  991. if (strpos($attvalue, "&") === false){
  992. return $attvalue;
  993. }
  994. /**
  995. * Check named entities first.
  996. */
  997. $trans = get_html_translation_table(HTML_ENTITIES);
  998. /**
  999. * Leave &quot; in, as it can mess us up.
  1000. */
  1001. $trans = array_flip($trans);
  1002. unset($trans{"&quot;"});
  1003. while (list($ent, $val) = each($trans)){
  1004. $attvalue = preg_replace("/$ent*(\W)/si", "$val\\1", $attvalue);
  1005. }
  1006. /**
  1007. * Now translate numbered entities from 1 to 255 if needed.
  1008. */
  1009. if (strpos($attvalue, "#") !== false){
  1010. $omit = Array(34, 39);
  1011. for ($asc=1; $asc<256; $asc++){
  1012. if (!in_array($asc, $omit)){
  1013. $chr = chr($asc);
  1014. $attvalue = preg_replace("/\&#0*$asc;*(\D)/si", "$chr\\1",
  1015. $attvalue);
  1016. $attvalue = preg_replace("/\&#x0*".dechex($asc).";*(\W)/si",
  1017. "$chr\\1", $attvalue);
  1018. }
  1019. }
  1020. }
  1021. return $attvalue;
  1022. }
  1023. /**
  1024. * This function runs various checks against the attributes.
  1025. *
  1026. * @param $tagname String with the name of the tag.
  1027. * @param $attary Array with all tag attributes.
  1028. * @param $rm_attnames See description for sq_sanitize
  1029. * @param $bad_attvals See description for sq_sanitize
  1030. * @param $add_attr_to_tag See description for sq_sanitize
  1031. * @param $message message object
  1032. * @param $id message id
  1033. * @return Array with modified attributes.
  1034. */
  1035. function sq_fixatts($tagname,
  1036. $attary,
  1037. $rm_attnames,
  1038. $bad_attvals,
  1039. $add_attr_to_tag,
  1040. $message,
  1041. $id,
  1042. $mailbox
  1043. ){
  1044. $me = 'sq_fixatts';
  1045. while (list($attname, $attvalue) = each($attary)){
  1046. /**
  1047. * See if this attribute should be removed.
  1048. */
  1049. foreach ($rm_attnames as $matchtag=>$matchattrs){
  1050. if (preg_match($matchtag, $tagname)){
  1051. foreach ($matchattrs as $matchattr){
  1052. if (preg_match($matchattr, $attname)){
  1053. unset($attary{$attname});
  1054. continue;
  1055. }
  1056. }
  1057. }
  1058. }
  1059. /**
  1060. * Remove any entities.
  1061. */
  1062. $attvalue = sq_deent($attvalue);
  1063. /**
  1064. * Now let's run checks on the attvalues.
  1065. * I don't expect anyone to comprehend this. If you do,
  1066. * get in touch with me so I can drive to where you live and
  1067. * shake your hand personally. :)
  1068. */
  1069. foreach ($bad_attvals as $matchtag=>$matchattrs){
  1070. if (preg_match($matchtag, $tagname)){
  1071. foreach ($matchattrs as $matchattr=>$valary){
  1072. if (preg_match($matchattr, $attname)){
  1073. /**
  1074. * There are two arrays in valary.
  1075. * First is matches.
  1076. * Second one is replacements
  1077. */
  1078. list($valmatch, $valrepl) = $valary;
  1079. $newvalue =
  1080. preg_replace($valmatch, $valrepl, $attvalue);
  1081. if ($newvalue != $attvalue){
  1082. $attary{$attname} = $newvalue;
  1083. }
  1084. }
  1085. }
  1086. }
  1087. }
  1088. /**
  1089. * Turn cid: urls into http-friendly ones.
  1090. */
  1091. if (preg_match("/^[\'\"]\s*cid:/si", $attvalue)){
  1092. $attary{$attname} = sq_cid2http($message, $id, $attvalue, $mailbox);
  1093. }
  1094. }
  1095. /**
  1096. * See if we need to append any attributes to this tag.
  1097. */
  1098. foreach ($add_attr_to_tag as $matchtag=>$addattary){
  1099. if (preg_match($matchtag, $tagname)){
  1100. $attary = array_merge($attary, $addattary);
  1101. }
  1102. }
  1103. return $attary;
  1104. }
  1105. /**
  1106. * This function edits the style definition to make them friendly and
  1107. * usable in squirrelmail.
  1108. *
  1109. * @param $message the message object
  1110. * @param $id the message id
  1111. * @param $content a string with whatever is between <style> and </style>
  1112. * @return a string with edited content.
  1113. */
  1114. function sq_fixstyle($message, $id, $content){
  1115. global $view_unsafe_images;
  1116. $me = 'sq_fixstyle';
  1117. /**
  1118. * First look for general BODY style declaration, which would be
  1119. * like so:
  1120. * body {background: blah-blah}
  1121. * and change it to .bodyclass so we can just assign it to a <div>
  1122. */
  1123. $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
  1124. $secremoveimg = '../images/' . _("sec_remove_eng.png");
  1125. /**
  1126. * Fix url('blah') declarations.
  1127. */
  1128. $content = preg_replace("|url\(([\'\"])\s*\S+script\s*:.*?([\'\"])\)|si",
  1129. "url(\\1$secremoveimg\\2)", $content);
  1130. /**
  1131. * Fix url('https*://.*) declarations but only if $view_unsafe_images
  1132. * is false.
  1133. */
  1134. if (!$view_unsafe_images){
  1135. $content = preg_replace("|url\(([\'\"])\s*https*:.*?([\'\"])\)|si",
  1136. "url(\\1$secremoveimg\\2)", $content);
  1137. }
  1138. /**
  1139. * Fix urls that refer to cid:
  1140. */
  1141. while (preg_match("|url\(([\'\"]\s*cid:.*?[\'\"])\)|si", $content,
  1142. $matches)){
  1143. $cidurl = $matches{1};
  1144. $httpurl = sq_cid2http($message, $id, $cidurl);
  1145. $content = preg_replace("|url\($cidurl\)|si",
  1146. "url($httpurl)", $content);
  1147. }
  1148. /**
  1149. * Fix stupid css declarations which lead to vulnerabilities
  1150. * in IE.
  1151. */
  1152. $match = Array('/expression/si',
  1153. '/behaviou*r/si',
  1154. '/binding/si');
  1155. $replace = Array('idiocy', 'idiocy', 'idiocy');
  1156. $content = preg_replace($match, $replace, $content);
  1157. return $content;
  1158. }
  1159. /**
  1160. * This function converts cid: url's into the ones that can be viewed in
  1161. * the browser.
  1162. *
  1163. * @param $message the message object
  1164. * @param $id the message id
  1165. * @param $cidurl the cid: url.
  1166. * @return a string with a http-friendly url
  1167. */
  1168. function sq_cid2http($message, $id, $cidurl, $mailbox){
  1169. /**
  1170. * Get rid of quotes.
  1171. */
  1172. $quotchar = substr($cidurl, 0, 1);
  1173. $cidurl = str_replace($quotchar, "", $cidurl);
  1174. $cidurl = substr(trim($cidurl), 4);
  1175. $linkurl = find_ent_id($cidurl, $message);
  1176. /* in case of non-save cid links $httpurl should be replaced by a sort of
  1177. unsave link image */
  1178. $httpurl = '';
  1179. if ($linkurl) {
  1180. $httpurl = $quotchar . '../src/download.php?absolute_dl=true&amp;' .
  1181. "passed_id=$id&amp;mailbox=" . urlencode($mailbox) .
  1182. '&amp;ent_id=' . $linkurl . $quotchar;
  1183. }
  1184. return $httpurl;
  1185. }
  1186. /**
  1187. * This function changes the <body> tag into a <div> tag since we
  1188. * can't really have a body-within-body.
  1189. *
  1190. * @param $attary an array of attributes and values of <body>
  1191. * @return a modified array of attributes to be set for <div>
  1192. */
  1193. function sq_body2div($attary){
  1194. $me = 'sq_body2div';
  1195. $divattary = Array('class' => "'bodyclass'");
  1196. $bgcolor = '#ffffff';
  1197. $text = '#000000';
  1198. $styledef = '';
  1199. if (is_array($attary) && sizeof($attary) > 0){
  1200. foreach ($attary as $attname=>$attvalue){
  1201. $quotchar = substr($attvalue, 0, 1);
  1202. $attvalue = str_replace($quotchar, "", $attvalue);
  1203. switch ($attname){
  1204. case 'background':
  1205. $styledef .= "background-image: url('$attvalue'); ";
  1206. break;
  1207. case 'bgcolor':
  1208. $styledef .= "background-color: $attvalue; ";
  1209. break;
  1210. case 'text':
  1211. $styledef .= "color: $attvalue; ";
  1212. break;
  1213. }
  1214. }
  1215. if (strlen($styledef) > 0){
  1216. $divattary{"style"} = "\"$styledef\"";
  1217. }
  1218. }
  1219. return $divattary;
  1220. }
  1221. /**
  1222. * This is the main function and the one you should actually be calling.
  1223. * There are several variables you should be aware of an which need
  1224. * special description.
  1225. *
  1226. * Since the description is quite lengthy, see it here:
  1227. * http://www.mricon.com/html/phpfilter.html
  1228. *
  1229. * @param $body the string with HTML you wish to filter
  1230. * @param $tag_list see description above
  1231. * @param $rm_tags_with_content see description above
  1232. * @param $self_closing_tags see description above
  1233. * @param $force_tag_closing see description above
  1234. * @param $rm_attnames see description above
  1235. * @param $bad_attvals see description above
  1236. * @param $add_attr_to_tag see description above
  1237. * @param $message message object
  1238. * @param $id message id
  1239. * @return sanitized html safe to show on your pages.
  1240. */
  1241. function sq_sanitize($body,
  1242. $tag_list,
  1243. $rm_tags_with_content,
  1244. $self_closing_tags,
  1245. $force_tag_closing,
  1246. $rm_attnames,
  1247. $bad_attvals,
  1248. $add_attr_to_tag,
  1249. $message,
  1250. $id,
  1251. $mailbox
  1252. ){
  1253. $me = 'sq_sanitize';
  1254. /**
  1255. * Normalize rm_tags and rm_tags_with_content.
  1256. */
  1257. @array_walk($rm_tags, 'sq_casenormalize');
  1258. @array_walk($rm_tags_with_content, 'sq_casenormalize');
  1259. @array_walk($self_closing_tags, 'sq_casenormalize');
  1260. /**
  1261. * See if tag_list is of tags to remove or tags to allow.
  1262. * false means remove these tags
  1263. * true means allow these tags
  1264. */
  1265. $rm_tags = array_shift($tag_list);
  1266. $curpos = 0;
  1267. $open_tags = Array();
  1268. $trusted = "<!-- begin sanitized html -->\n";
  1269. $skip_content = false;
  1270. /**
  1271. * Take care of netscape's stupid javascript entities like
  1272. * &{alert('boo')};
  1273. */
  1274. $body = preg_replace("/&(\{.*?\};)/si", "&amp;\\1", $body);
  1275. while (($curtag=sq_getnxtag($body, $curpos)) != FALSE){
  1276. list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
  1277. $free_content = substr($body, $curpos, $lt-$curpos);
  1278. /**
  1279. * Take care of <style>
  1280. */
  1281. if ($tagname == "style" && $tagtype == 2){
  1282. /**
  1283. * This is a closing </style>. Edit the
  1284. * content before we apply it.
  1285. */
  1286. $free_content = sq_fixstyle($message, $id, $free_content);
  1287. }
  1288. if ($skip_content == false){
  1289. $trusted .= $free_content;
  1290. }
  1291. if ($tagname != FALSE){
  1292. if ($tagtype == 2){
  1293. if ($skip_content == $tagname){
  1294. /**
  1295. * Got to the end of tag we needed to remove.
  1296. */
  1297. $tagname = false;
  1298. $skip_content = false;
  1299. } else {
  1300. if ($skip_content == false){
  1301. if ($tagname == "body"){
  1302. $tagname = "div";
  1303. } else {
  1304. if (isset($open_tags{$tagname}) &&
  1305. $open_tags{$tagname} > 0){
  1306. $open_tags{$tagname}--;
  1307. } else {
  1308. $tagname = false;
  1309. }
  1310. }
  1311. }
  1312. }
  1313. } else {
  1314. /**
  1315. * $rm_tags_with_content
  1316. */
  1317. if ($skip_content == false){
  1318. /**
  1319. * See if this is a self-closing type and change
  1320. * tagtype appropriately.
  1321. */
  1322. if ($tagtype == 1
  1323. && in_array($tagname, $self_closing_tags)){
  1324. $tagtype=3;
  1325. }
  1326. /**
  1327. * See if we should skip this tag and any content
  1328. * inside it.
  1329. */
  1330. if ($tagtype == 1 &&
  1331. in_array($tagname, $rm_tags_with_content)){
  1332. $skip_content = $tagname;
  1333. } else {
  1334. if (($rm_tags == false
  1335. && in_array($tagname, $tag_list)) ||
  1336. ($rm_tags == true &&
  1337. !in_array($tagname, $tag_list))){
  1338. $tagname = false;
  1339. } else {
  1340. if ($tagtype == 1){
  1341. if (isset($open_tags{$tagname})){
  1342. $open_tags{$tagname}++;
  1343. } else {
  1344. $open_tags{$tagname}=1;
  1345. }
  1346. }
  1347. /**
  1348. * This is where we run other checks.
  1349. */
  1350. if (is_array($attary) && sizeof($attary) > 0){
  1351. $attary = sq_fixatts($tagname,
  1352. $attary,
  1353. $rm_attnames,
  1354. $bad_attvals,
  1355. $add_attr_to_tag,
  1356. $message,
  1357. $id,
  1358. $mailbox
  1359. );
  1360. }
  1361. /**
  1362. * Convert body into div.
  1363. */
  1364. if ($tagname == "body"){
  1365. $tagname = "div";
  1366. $attary = sq_body2div($attary, $message, $id);
  1367. }
  1368. }
  1369. }
  1370. }
  1371. }
  1372. if ($tagname != false && $skip_content == false){
  1373. $trusted .= sq_tagprint($tagname, $attary, $tagtype);
  1374. }
  1375. }
  1376. $curpos = $gt+1;
  1377. }
  1378. $trusted .= substr($body, $curpos, strlen($body)-$curpos);
  1379. if ($force_tag_closing == true){
  1380. foreach ($open_tags as $tagname=>$opentimes){
  1381. while ($opentimes > 0){
  1382. $trusted .= '</' . $tagname . '>';
  1383. $opentimes--;
  1384. }
  1385. }
  1386. $trusted .= "\n";
  1387. }
  1388. $trusted .= "<!-- end sanitized html -->\n";
  1389. return $trusted;
  1390. }
  1391. /**
  1392. * This is a wrapper function to call html sanitizing routines.
  1393. *
  1394. * @param $body the body of the message
  1395. * @param $id the id of the message
  1396. * @return a string with html safe to display in the browser.
  1397. */
  1398. function magicHTML($body, $id, $message, $mailbox = 'INBOX') {
  1399. global $attachment_common_show_images, $view_unsafe_images,
  1400. $has_unsafe_images;
  1401. /**
  1402. * Don't display attached images in HTML mode.
  1403. */
  1404. $attachment_common_show_images = false;
  1405. $tag_list = Array(
  1406. false,
  1407. "object",
  1408. "meta",
  1409. "html",
  1410. "head",
  1411. "base",
  1412. "link",
  1413. "frame",
  1414. "iframe"
  1415. );
  1416. $rm_tags_with_content = Array(
  1417. "script",
  1418. "applet",
  1419. "embed",
  1420. "title"
  1421. );
  1422. $self_closing_tags = Array(
  1423. "img",
  1424. "br",
  1425. "hr",
  1426. "input"
  1427. );
  1428. $force_tag_closing = false;
  1429. $rm_attnames = Array(
  1430. "/.*/" =>
  1431. Array(
  1432. "/target/si",
  1433. "/^on.*/si",
  1434. "/^dynsrc/si",
  1435. "/^data.*/si"
  1436. )
  1437. );
  1438. $secremoveimg = "../images/" . _("sec_remove_eng.png");
  1439. $bad_attvals = Array(
  1440. "/.*/" =>
  1441. Array(
  1442. "/^src|background/i" =>
  1443. Array(
  1444. Array(
  1445. "|^([\'\"])\s*\.\./.*([\'\"])|si",
  1446. "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
  1447. "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
  1448. "/^([\'\"])\s*about\s*:.*([\'\"])/si"
  1449. ),
  1450. Array(
  1451. "\\1$secremoveimg\\2",
  1452. "\\1$secremoveimg\\2",
  1453. "\\1$secremoveimg\\2",
  1454. "\\1$secremoveimg\\2"
  1455. )
  1456. ),
  1457. "/^href|action/i" =>
  1458. Array(
  1459. Array(
  1460. "|^([\'\"])\s*\.\./.*([\'\"])|si",
  1461. "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
  1462. "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
  1463. "/^([\'\"])\s*about\s*:.*([\'\"])/si"
  1464. ),
  1465. Array(
  1466. "\\1#\\2",
  1467. "\\1#\\2",
  1468. "\\1#\\2",
  1469. "\\1#\\2"
  1470. )
  1471. ),
  1472. "/^style/si" =>
  1473. Array(
  1474. Array(
  1475. "/expression/si",
  1476. "/binding/si",
  1477. "/behaviou*r/si",
  1478. "|url\(([\'\"])\s*\.\./.*([\'\"])\)|si",
  1479. "/url\(([\'\"])\s*\S+script\s*:.*([\'\"])\)/si",
  1480. "/url\(([\'\"])\s*mocha\s*:.*([\'\"])\)/si",
  1481. "/url\(([\'\"])\s*about\s*:.*([\'\"])\)/si"
  1482. ),
  1483. Array(
  1484. "idiocy",
  1485. "idiocy",
  1486. "idiocy",
  1487. "url(\\1#\\2)",
  1488. "url(\\1#\\2)",
  1489. "url(\\1#\\2)",
  1490. "url(\\1#\\2)"
  1491. )
  1492. )
  1493. )
  1494. );
  1495. if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) {
  1496. $view_unsafe_images = false;
  1497. }
  1498. if (!$view_unsafe_images){
  1499. /**
  1500. * Remove any references to http/https if view_unsafe_images set
  1501. * to false.
  1502. */
  1503. array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[0],
  1504. '/^([\'\"])\s*https*:.*([\'\"])/si');
  1505. array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[1],
  1506. "\\1$secremoveimg\\2");
  1507. array_push($bad_attvals{'/.*/'}{'/^style/si'}[0],
  1508. '/url\(([\'\"])\s*https*:.*([\'\"])\)/si');
  1509. array_push($bad_attvals{'/.*/'}{'/^style/si'}[1],
  1510. "url(\\1$secremoveimg\\2)");
  1511. }
  1512. $add_attr_to_tag = Array(
  1513. "/^a$/si" => Array('target'=>'"_new"')
  1514. );
  1515. $trusted = sq_sanitize($body,
  1516. $tag_list,
  1517. $rm_tags_with_content,
  1518. $self_closing_tags,
  1519. $force_tag_closing,
  1520. $rm_attnames,
  1521. $bad_attvals,
  1522. $add_attr_to_tag,
  1523. $message,
  1524. $id,
  1525. $mailbox
  1526. );
  1527. if (preg_match("|$secremoveimg|si", $trusted)){
  1528. $has_unsafe_images = true;
  1529. }
  1530. return $trusted;
  1531. }
  1532. ?>