# robots.txt for http://www.staffinglinks.com/ # # Please note: There are a lot of pages on this site, and there are # some misbehaved spiders out there that go _way_ too fast. If you're # irresponsible, your access to the site may be blocked. # # advertising-related bots: # Wikipedia work bots: User-agent: IsraBot Disallow: / User-agent: Orthogaffe Disallow: / # Some bots are known to be trouble, particularly those designed to copy # entire sites. Please obey robots.txt. User-agent: sitecheck.internetseer.com Disallow: / User-agent: Zealbot Disallow: / User-agent: MSIECrawler Disallow: / User-agent: SiteSnagger Disallow: / User-agent: WebStripper Disallow: / User-agent: WebCopier Disallow: / User-agent: Fetch Disallow: / User-agent: Offline Explorer Disallow: / User-agent: Teleport Disallow: / User-agent: TeleportPro Disallow: / User-agent: WebZIP Disallow: / User-agent: linko Disallow: / User-agent: HTTrack Disallow: / User-agent: Microsoft.URL.Control Disallow: / User-agent: larbin Disallow: / User-agent: libwww Disallow: / User-agent: ZyBORG Disallow: / User-agent: Download Ninja Disallow: / # # The 'grub' distributed client has been *very* poorly behaved. # User-agent: grub-client Disallow: / # # Doesn't follow robots.txt anyway, but... # User-agent: k2spider Disallow: / # # Hits many times per second, not acceptable # http://www.nameprotect.com/botinfo.html User-agent: NPBot Disallow: / # A capture bot, downloads gazillions of pages with no public benefit # http://www.webreaper.net/ User-agent: WebReaper Disallow: / User-agent: * Disallow: sitemap.xml Disallow: /crawltrack/ Disallow: /css/ Disallow: /images/ Disallow: /includes/ Disallow: /test/ Disallow: /w3c/ Disallow: /contact/ Disallow: /bin/ Disallow: /cgi-bin/ Disallow: /cgi-local/ Disallow: /etc/ Disallow: /ftp/ Disallow: /logs/ Disallow: /mail/ Disallow: /mivadata/ Disallow: /tmp/ Disallow: /rename.bat Disallow: /counter/ Disallow: /css/ Disallow: /fms/ Disallow: /images/ Disallow: /import/ Disallow: /inc/ Disallow: /js_associations/ Disallow: /js_ca_companies/ Disallow: /js_career_info/ Disallow: /js_career_links/ Disallow: /js_city/ Disallow: /js_colleges/ Disallow: /js_employers/ Disallow: /js_government/ Disallow: /js_jobbank/ Disallow: /js_magazines/ Disallow: /js_newspapers/ Disallow: /js_other/ Disallow: /js_recservices/ Disallow: /js_rotate/ Disallow: /js_salary/ Disallow: /js_state/ Disallow: /js_test/ Disallow: /misc/ Disallow: /mlist/ Disallow: /pagerank/ Disallow: /paypal/ Disallow: /psearch/ Disallow: /sales/ Disallow: /searchactivity/ Disallow: /stats/ Disallow: /test/ Disallow: /turbo/ Disallow: index2.htm Disallow: index2.old Disallow: index.old Disallow: index.shtml Disallow: 1193.shtm Disallow: 431.txt Disallow: adex1.htm Disallow: adex2.htm Disallow: adex3.htm Disallow: adex4.htm Disallow: adex5.htm Disallow: adnews.php Disallow: atom2rss.php Disallow: Career_Advice.php Disallow: Career_Advice_Index.php Disallow: career_journal_import.php Disallow: carp.php Disallow: carpsetup.php Disallow: class.breadcrumb.inc.php Disallow: cntest.php Disallow: CN_Job_Search_Results.php Disallow: combo1.php Disallow: comicalert.html Disallow: Contact.php Disallow: crcadv.htm Disallow: crcass.htm Disallow: crccaco.htm Disallow: crccareer.htm Disallow: crccity.htm Disallow: crcco.htm Disallow: crccont.htm Disallow: crccor.htm Disallow: crcedu.htm Disallow: crcfaq.htm Disallow: crcfch.htm Disallow: crcgoogle.php3 Disallow: crcgooglese.php3 Disallow: crcgov.htm Disallow: crcivw.htm Disallow: crcmag.htm Disallow: crcmap.htm Disallow: crcmap.xml Disallow: crcmcs.htm Disallow: crcmjs.htm Disallow: crcmjs.php Disallow: crcmjs.php.old Disallow: crcmjs2.htm Disallow: crcmjscn.php Disallow: crcmjscn.php.old Disallow: crcmjsgov.htm Disallow: crcmjst.php Disallow: crcmjst2.php Disallow: crcng.htm Disallow: crcnld.htm Disallow: crcnp.htm Disallow: crcpost.htm Disallow: crcpostb.htm Disallow: crcpro.htm Disallow: crcrch.htm Disallow: crcrec.htm Disallow: crcrelo.htm Disallow: crcrsm.htm Disallow: crcsal.htm Disallow: crcsb.htm Disallow: crcsearch.htm Disallow: crcsemap.htm Disallow: crcsite.htm Disallow: crcstate.htm Disallow: crcsupport.htm Disallow: crctools.php Disallow: crcxml.htm Disallow: crcxmlsites.htm Disallow: ctr.php Disallow: ctr.txt Disallow: Employment_Career_News.php Disallow: ezcounter.php Disallow: ezcounter.txt Disallow: footer.js Disallow: footer.php Disallow: formfb.htm Disallow: formurl.htm Disallow: head.php Disallow: hello.php3 Disallow: import.txt Disallow: jobtest.php Disallow: Job_Search_Advice.php Disallow: Job_Search_Index.php Disallow: Job_Search_Results.php Disallow: jsmap.xml Disallow: junk.htm Disallow: junk.php Disallow: Junk2.jpg Disallow: junk2.php Disallow: junk3.php Disallow: junkjms.php Disallow: links.txt Disallow: link_business.htm Disallow: link_career.htm Disallow: link_exchange.htm Disallow: link_homebus.htm Disallow: link_other.htm Disallow: link_recruiters.htm Disallow: link_resume.htm Disallow: link_training.htm Disallow: lists.htm Disallow: log.txt Disallow: member_import.js Disallow: nav.php Disallow: navinclude.js Disallow: nav_import2.js Disallow: newmjs.php Disallow: newsgroup.htm Disallow: newsresults.htm Disallow: newstmpl.php Disallow: newstmpl2.php Disallow: odp2.php Disallow: pseekmap.xml Disallow: reachrss.tpl Disallow: resources_import.js Disallow: sample-template.html Disallow: sample-template2.html Disallow: sitemapindex.xml Disallow: sitemapxml.php Disallow: sitemap_notes.htm Disallow: testjs.htm Disallow: testjs.php Disallow: work.php Disallow: work4.php Disallow: youarehere.php