# domo arigato mr. roboto # we like these fine folks User-Agent: ia_archiver Allow: / # since Google keeps hitting 404 urls and won't stop.. User-Agent: * Disallow: /cgi-bin Disallow: /rss/ Disallow: /mirror/ # a mirror scraping trap will cause a request to this. if a bot honors this file no issue. if not, fail2ban will get them Disallow: /HelloImScrapingTheMirrorInsteadOfDownloadingOffGitHubBanMeNow # from http://www.last.fm/robots.txt Disallow: /harming/humans Disallow: /ignoring/human/orders Disallow: /harm/to/self # much of the following taken from http://pigdog.org/robots.txt. # http://www.plagiarism.org/crawler/robotinfo.html Fuck off, snitchbot! User-Agent: SlySearch Disallow: / # various lame bots User-Agent: arianna.iol.it Linux/2.2.17-14smp (linux) Disallow: / User-Agent: Webclipping.com Disallow: / User-Agent: Casper Bot Search Disallow: / User-Agent: dex Bot Search Disallow: / User-Agent: Blekkobot Disallow: / User-Agent: http://blekko.com/about/blekkobot Disallow: / User-Agent: ScoutJet Disallow: / User-Agent: TinyBotTestUA Disallow: / User-Agent: krzana-rss-bot Disallow: / # https://cs.daum.net/faq/15/4118.html?faqId=28966 requesting strange shit that is more than just a crawler, plonk! User-agent: DAUM Disallow: / # People who just set up dork-ass library bots they downloaded # off the Innurnet, and don't even bother to ID themselves, # are ASS. Go fuck yourself. User-Agent: larbin_2.6.2 larbin2.6.2@unspecified.mail Disallow: / User-Agent: larbin_2.6.2 Disallow: / User-Agent: larbin_2.6.1 larbin2.6.2@unspecified.mail Disallow: / User-Agent: larbin_2.6.1 Disallow: / User-Agent: Java1.3.0 Disallow: / User-Agent: Java1.4.0 Disallow: / User-Agent: Java Disallow: / User-Agent: WinampMPEG/2.00 larbin@unspecified.mail Disallow: / User-Agent: WinampMPEG Disallow: / User-Agent: MSIE-5.13 larbin@unspecified.mail Disallow: / User-Agent: Opera/6.01 larbin2.6.2@unspecified.mail Disallow: / User-Agent: Opera/6.01 larbin@unspecified.mail Disallow: / User-Agent: Mozilla/5.0 larbin2.6.2@unspecified.mail Disallow: / # Welcome to the Innernet. You might want to read the HTTP spec before # fucking with our Web site. Your user-agent ID is bad, which suggests # that you don't know shit, and you're a sloppy programmer. Go away. # http://www.ietf.org/rfc/rfc1945.txt # http://www.ietf.org/rfc/rfc2068.txt # this is not email, d00d. User-Agent: bumblebee@relevare.com Disallow: / User-Agent: Zeus 64087 Webster Pro V2.9 Win32 Disallow: / # whitespace, d00d. User-Agent: NetResearchServer/2.3(loopimprovements.com/robot.html) Disallow: / User-Agent: NetResearchServer Disallow: / # ID first, d00d. Then a space, then a comment. User-Agent: Openfind data gatherer, Openbot/3.0+(robot-response@openfind.com.tw;+http://www.openfind.com.tw/robot.html) Disallow: / User-Agent: Openbot Disallow: / # Gosh, so close. User-Agent: NetMechanic V3.0 Disallow: / User-Agent: NetMechanic Disallow: / # No spaces. First the ID, then a space, then a comment. User-Agent: Ariadne RPT-HTTPClient/0.3-3 Disallow: / User-Agent: Ariadne Disallow: / # You suck. User-Agent: Mozilla/4.0 compatible ZyBorg/1.0 (ZyBorg@WISEnutbot.com; http://www.WISEnutbot.com) Disallow: / User-Agent: ZyBorg Disallow: / # slash, not a space User-Agent: appie 1.1 (www.walhello.com) Disallow: / User-Agent: appie Disallow: / # bad version string User-Agent: search.ch V1.4.2 (spiderman@search.ch; http://www.search.ch) Disallow: / User-Agent: search.ch Disallow: / # No spaces. User-Agent: PingALink Monitoring Services 1.0 (http://www.pingalink.com) Disallow: / User-Agent: PingALink Monitoring Services Disallow: / User-Agent: PingALink Disallow: / # Other misc blocks User-Agent: Enterprise_Search/1.0 (http://www.innerprise.net/es-spider.asp) Disallow: / User-Agent: BlitzBot Disallow: / # spoofing the href to sites that have annoying javascript and other stuff User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .WONKZ) Disallow: / # http://www.clockwatchers.com/robots_list.html spammer bots User-Agent: EmailSiphon Disallow: / User-Agent: EmailWolf Disallow: / User-Agent: ExtractorPro Disallow: / User-Agent: CherryPicker Disallow: / User-Agent: NICErsPRO Disallow: / User-Agent: Teleport Disallow: / User-Agent: EmailCollector Disallow: / User-Agent: DirBuster-0.9.7 www.sittinglittleduck.com Disallow: / # 2/9/2021 User-Agent: compatible; Adsbot/3.1 Disallow: / User-Agent: Mozilla/5.0 (compatible; Adsbot/3.1) Disallow: / # 2/10/2021 User-Agent: Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot, help@moz.com) Disallow: / User-Agent: magpie-crawler/1.1 (robots-txt-checker; +http://www.brandwatch.net) Disallow: / User-agent: ltx71 - (http://ltx71.com/) Disallow: / # for Ilia Disallow: /pipermail/attrition/2011-May/000357.html