Webnet Official Crawler

• Sites without keywords wont be displayed

• Sites with https wont be displayed

• All links crawled will get inserted to database automatically

• To fix site without keywords shall be applied manually from Database

array('method'=>"GET", 'headers'=>"User-Agent: howBot/0.1\n")); $context = stream_context_create($options); $doc = new DOMDocument(); @$doc->loadHTML(@file_get_contents($s_link, false, $context)); $s_title = $doc->getElementsByTagName("title"); $s_title = $s_title->item(0)->nodeValue; $s_des = ""; $s_key = ""; $metas = $doc->getElementsByTagName("meta"); for ($i = 0; $i < $metas->length; $i++) { $meta = $metas->item($i); if (strtolower($meta->getAttribute("name")) == "description") $s_des = $meta->getAttribute("content"); if (strtolower($meta->getAttribute("name")) == "keywords") $s_key = $meta->getAttribute("content"); } echo $s_title ;?>






array('method'=>"GET", 'headers'=>"User-Agent: howBot/0.1\n")); $context = stream_context_create($options); $doc = new DOMDocument(); @$doc->loadHTML(@file_get_contents($url, false, $context)); $linklist = $doc->getElementsByTagName("a"); foreach ($linklist as $link) { $l = $link->getAttribute("href"); if (substr($l, 0, 1) == "/" && substr($l, 0, 2) != "//") { $l = parse_url($url)["scheme"]."://".parse_url($url)["host"].$l; } else if (substr($l, 0, 2) == "//") { $l = parse_url($url)["scheme"].":".$l; } else if (substr($l, 0, 2) == "./") { $l = parse_url($url)["scheme"]."://".parse_url($url)["host"].dirname(parse_url($url)["path"]).substr($l, 1); } else if (substr($l, 0, 1) == "#") { $l = parse_url($url)["scheme"]."://".parse_url($url)["host"].parse_url($url)["path"].$l; } else if (substr($l, 0, 3) == "../") { $l = parse_url($url)["scheme"]."://".parse_url($url)["host"]."/".$l; } else if (substr($l, 0, 11) == "javascript:") { continue; } else if (substr($l, 0, 5) != "https" && substr($l, 0, 4) != "http") { $l = parse_url($url)["scheme"]."://".parse_url($url)["host"]."/".$l; } if (!in_array($l, $already_crawled)) { $already_crawled[] = $l; $crawling[] = $l; echo get_details($l)."\n"; } } array_shift($crawling); foreach ($crawling as $site) { follow_links($site); } } follow_links($start); ?>