Sfoglia il codice sorgente

* Improved speed of URL parser
* Made useful regexps global so other plugins (like abook_take) could use them
* Made host regexp match any TLD because of what New.net and ICANN are
squabbling about -- might as well be safe and match a little too much than
not match a valid host.

Tyler Akins 24 anni fa
parent
commit
bad801a8a1
1 ha cambiato i file con 41 aggiunte e 37 eliminazioni
  1. 41 37
      functions/url_parser.php

+ 41 - 37
functions/url_parser.php

@@ -4,48 +4,48 @@
 
 
    $url_parser_php = true;
    $url_parser_php = true;
 
 
-   function replaceBlock ($in, $replace, $start, $end) {
+   function replaceBlock (&$in, $replace, $start, $end) {
       $begin = substr($in,0,$start);
       $begin = substr($in,0,$start);
       $end   = substr($in,$end,strlen($in)-$end);
       $end   = substr($in,$end,strlen($in)-$end);
-      $ret   = $begin.$replace.$end;
-      return $ret;
+      $in    = $begin.$replace.$end;
    }
    }
 
 
+   // Having this defined in just one spot could help when changes need
+   // to be made to the pattern
+   // Make sure that the expression is evaluated case insensitively
+   // 
+   // Here's pretty sophisticated IP matching:
+   // $IPMatch = '(2[0-5][0-9]|1?[0-9]{1,2})';
+   // $IPMatch = '\[?' . $IPMatch . '(\.' . $IPMatch . '){3}\]?';
+   //
+   // Here's enough:
+   $IP_RegExp_Match = '\\[?[0-9]{1,3}(\\.[0-9]{1,3}){3}\\]?';
+   $Host_RegExp_Match = '(' . $IP_RegExp_Match . 
+       '|[0-9a-z]([-.]?[0-9a-z])*\\.[a-z][a-z]+)';
+   $Email_RegExp_Match = '[0-9a-z]([-_.]?[0-9a-z])*(%' . $Host_RegExp_Match . 
+       ')?@' . $Host_RegExp_Match;
+      
    function parseEmail (&$body) {
    function parseEmail (&$body) {
-      global $color;
+      global $color, $Email_RegExp_Match;
       $Size = strlen($body);
       $Size = strlen($body);
       
       
-      // Having this defined in just one spot could help when changes need
-      // to be made to the pattern
-      // Make sure that the expression is evaluated case insensitively
-      // 
-      // Here's pretty sophisticated IP matching:
-      // $IPMatch = '(2[0-5][0-9]|1?[0-9]{1,2})';
-      // $IPMatch = '\[?' . $IPMatch . '(\.' . $IPMatch . '){3}\]?';
-      //
-      // Here's enough:
-      $IPMatch = '\\[?[0-9]{1,3}(\\.[0-9]{1,3}){3}\\]?';
-      $Host = '(' . $IPMatch .
-'|[0-9a-z]([-.]?[0-9a-z])*\\.[a-wyz][a-z](fo|g|l|m|me|o|op|pa|ro|seum|t|u|v|z)?)';
-      $Expression = '[0-9a-z]([-_.]?[0-9a-z])*(%' . $Host . ')?@' . $Host;
-      
       /*
       /*
         This is here in case we ever decide to use highlighting of searched
         This is here in case we ever decide to use highlighting of searched
         text.  this does it for email addresses
         text.  this does it for email addresses
         
         
       if ($what && ($where == "BODY" || $where == "TEXT")) {
       if ($what && ($where == "BODY" || $where == "TEXT")) {
-         eregi ($Expression, $body, $regs);
+         eregi ($Email_RegExp_Match, $body, $regs);
          $oldaddr = $regs[0];
          $oldaddr = $regs[0];
          if ($oldaddr) {
          if ($oldaddr) {
             $newaddr = eregi_replace ($what, "<b><font color=\"$color[2]\">$what</font></font></b>", $oldaddr);
             $newaddr = eregi_replace ($what, "<b><font color=\"$color[2]\">$what</font></font></b>", $oldaddr);
             $body = str_replace ($oldaddr, "<a href=\"../src/compose.php?send_to=$oldaddr\">$newaddr</a>", $body); 
             $body = str_replace ($oldaddr, "<a href=\"../src/compose.php?send_to=$oldaddr\">$newaddr</a>", $body); 
          }
          }
       } else { 
       } else { 
-         $body = eregi_replace ($Expression, "<a href=\"../src/compose.php?send_to=\\0\">\\0</a>", $body);
+         $body = eregi_replace ($Email_RegExp_Match, "<a href=\"../src/compose.php?send_to=\\0\">\\0</a>", $body);
       }
       }
       */
       */
       
       
-      $body = eregi_replace ($Expression, "<a href=\"../src/compose.php?send_to=\\0\">\\0</a>", $body); 
+      $body = eregi_replace ($Email_RegExp_Match, "<a href=\"../src/compose.php?send_to=\\0\">\\0</a>", $body); 
       
       
       // If there are any changes, it'll just get bigger.
       // If there are any changes, it'll just get bigger.
       if ($Size != strlen($body))
       if ($Size != strlen($body))
@@ -54,20 +54,24 @@
    }
    }
 
 
 
 
-   function parseUrl (&$body)
-   {
-      $url_tokens = array(
-         'http://',
-         'https://',
-         'ftp://',
-         'telnet:',  // Special case -- doesn't need the slashes
-         'gopher://',
-         'news://');
+   // We don't want to re-initialize this stuff for every line.  Save work
+   // and just do it once here.
+   $url_parser_url_tokens = array(
+       'http://',
+       'https://',
+       'ftp://',
+       'telnet:',  // Special case -- doesn't need the slashes
+       'gopher://',
+       'news://');
 
 
-      $poss_ends = array(' ', "\n", "\r", '<', '>', ".\r", ".\n", '.&nbsp;', 
-         '&nbsp;', ')', '(', '&quot;', '&lt;', '&gt;', '.<', ']', '[', '{', 
-         '}', "\240");
+   $url_parser_poss_ends = array(' ', "\n", "\r", '<', '>', ".\r", ".\n", 
+       '.&nbsp;', '&nbsp;', ')', '(', '&quot;', '&lt;', '&gt;', '.<', 
+       ']', '[', '{', '}', "\240");
 
 
+
+   function parseUrl (&$body)
+   {
+      global $url_parser_poss_ends, $url_parser_url_tokens;;
       $start = 0;
       $start = 0;
       $target_pos = strlen($body);
       $target_pos = strlen($body);
       
       
@@ -76,7 +80,7 @@
         $target_token = '';
         $target_token = '';
         
         
         // Find the first token to replace
         // Find the first token to replace
-        foreach ($url_tokens as $the_token)
+        foreach ($url_parser_url_tokens as $the_token)
         {
         {
           $pos = strpos(strtolower($body), $the_token, $start);
           $pos = strpos(strtolower($body), $the_token, $start);
           if (is_int($pos) && $pos < $target_pos)
           if (is_int($pos) && $pos < $target_pos)
@@ -91,7 +95,7 @@
        
        
         if (parseEmail($check_str))
         if (parseEmail($check_str))
         {
         {
-          $body = replaceBlock($body, $check_str, $start, $target_pos);
+          replaceBlock($body, $check_str, $start, $target_pos);
           $target_pos = strlen($check_str) + $start;
           $target_pos = strlen($check_str) + $start;
         }
         }
 
 
@@ -100,7 +104,7 @@
         {
         {
           // Find the end of the URL
           // Find the end of the URL
           $end=strlen($body); 
           $end=strlen($body); 
-          foreach ($poss_ends as $key => $val)
+          foreach ($url_parser_poss_ends as $key => $val)
           {
           {
             $enda = strpos($body,$val,$target_pos);
             $enda = strpos($body,$val,$target_pos);
             if (is_int($enda) && $enda < $end) 
             if (is_int($enda) && $enda < $end) 
@@ -114,7 +118,7 @@
           if ($url != '' && $url != $target_token) 
           if ($url != '' && $url != $target_token) 
           {
           {
             $url_str = "<a href=\"$url\" target=\"_blank\">$url</a>";
             $url_str = "<a href=\"$url\" target=\"_blank\">$url</a>";
-            $body = replaceBlock($body,$url_str,$target_pos,$end);
+            replaceBlock($body,$url_str,$target_pos,$end);
             $target_pos += strlen($url_str);
             $target_pos += strlen($url_str);
           } 
           } 
           else 
           else