ErrorDocument 403 /403.html
RewriteEngine On
RewriteBase /
# IF THE UA STARTS WITH THESE
RewriteCond %{HTTP_USER_AGENT} ^(aesop_com_spiderman|alexibot|backweb|bandit|batchftp|bigfoot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(black.?hole|blackwidow|blowfish|botalot|buddy|builtbottough|bullseye) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(cheesebot|cherrypicker|chinaclaw|collector|copier|copyrightcheck) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(cosmos|crescent|curl|custo|da|diibot|disco|dittospyder|dragonfly) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(drip|easydl|ebingbong|ecatch|eirgrabber|emailcollector|emailsiphon) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(emailwolf|erocrawler|exabot|eyenetie|filehound|flashget|flunky) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(frontpage|getright|getweb|go.?zilla|go-ahead-got-it|gotit|grabnet) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(grafula|harvest|hloader|hmview|httplib|httrack|humanlinks|ilsebot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(infonavirobot|infotekies|intelliseek|interget|iria|jennybot|jetcar) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(joc|justview|jyxobot|kenjin|keyword|larbin|leechftp|lexibot|lftp|libweb) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(likse|linkscan|linkwalker|lnspiderguy|lwp|magnet|mag-net|markwatch) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(mata.?hari|memo|microsoft.?url|midown.?tool|miixpc|mirror|missigua) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(mister.?pix|moget|mozilla.?newt|nameprotect|navroad|backdoorbot|nearsite) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(net.?vampire|netants|netcraft|netmechanic|netspider|nextgensearchbot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(attach|nicerspro|nimblecrawler|npbot|octopus|offline.?explorer) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(offline.?navigator|openfind|outfoxbot|pagegrabber|papa|pavuk) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(pcbrowser|php.?version.?tracker|pockey|propowerbot|prowebwalker) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(psbot|pump|queryn|recorder|realdownload|reaper|reget|true_robot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(repomonkey|rma|internetseer|sitesnagger|siphon|slysearch|smartdownload) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(snake|snapbot|snoopy|sogou|spacebison|spankbot|spanner|sqworm|superbot) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(superhttp|surfbot|asterias|suzuran|szukacz|takeout|teleport) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(telesoft|the.?intraformant|thenomad|tighttwatbot|titan|urldispatcher) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(turingos|turnitinbot|urly.?warning|vacuum|vci|voideye|whacker) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^(libwww-perl|widow|wisenutbot|wwwoffle|xaldon|xenu|zeus|zyborg|anonymouse) [NC,OR]
# STARTS WITH WEB
RewriteCond %{HTTP_USER_AGENT} ^web(zip|emaile|enhancer|fetch|go.?is|auto|bandit|clip|copier|master|reaper|sauger|site.?quester|whack) [NC,OR]
# ANYWHERE IN UA — GREEDY REGEX
RewriteCond %{HTTP_USER_AGENT} ^.*(craftbot|download|extract|stripper|sucker|ninja|clshttp|webspider|leacher|collector|grabber|webpictures).*$ [NC]
# ISSUE 403 / SERVE ERRORDOCUMENT
RewriteRule . - [F,L]
Alternate RewriteCond Rules
RewriteEngine on
#Block spambots
RewriteCond %{HTTP:User-Agent} (?:Alexibot|Art-Online|asterias|BackDoorbot|Black.Hole|\
BlackWidow|BlowFish|botALot|BuiltbotTough|Bullseye|BunnySlippers|Cegbfeieh|Cheesebot|\
CherryPicker|ChinaClaw|CopyRightCheck|cosmos|Crescent|Custo|DISCo|DittoSpyder|DownloadsDemon|\
eCatch|EirGrabber|EmailCollector|EmailSiphon|EmailWolf|EroCrawler|ExpresssWebPictures|ExtractorPro|\
EyeNetIE|FlashGet|Foobot|FrontPage|GetRight|GetWeb!|Go-Ahead-Got-It|Go!Zilla|GrabNet|Grafula|\
Harvest|hloader|HMView|httplib|HTTrack|humanlinks|ImagesStripper|ImagesSucker|IndysLibrary|\
InfonaviRobot|InterGET|Internet\sNinja|Jennybot|JetCar|JOC\sWeb\sSpider|Kenjin.Spider|Keyword.Density|\
larbin|LeechFTP|Lexibot|libWeb/clsHTTP|LinkextractorPro|LinkScan/8.1a.Unix|LinkWalker|lwp-trivial|\
Mass\sDownloader|Mata.Hari|Microsoft.URL|MIDown\stool|MIIxpc|Mister.PiX|Mister\sPiX|moget|\
Mozilla/3.Mozilla/2.01|Mozilla.*NEWT|Navroad|NearSite|NetAnts|NetMechanic|NetSpider|Net\sVampire|\
NetZIP|NICErsPRO|NPbot|Octopus|Offline.Explorer|Offline\sExplorer|Offline\sNavigator|Openfind|\
Pagerabber|Papa\sFoto|pavuk|pcBrowser|Program\sShareware\s1|ProPowerbot/2.14|ProWebWalker|ProWebWalker|\
psbot/0.1|QueryN.Metasearch|ReGet|RepoMonkey|RMA|SiteSnagger|SlySearch|SmartDownload|Spankbot|spanner|\
Superbot|SuperHTTP|Surfbot|suzuran|Szukacz/1.4|tAkeOut|Teleport|Teleport\sPro|Telesoft|The.Intraformant|\
TheNomad|TightTwatbot|Titan|toCrawl/UrlDispatcher|toCrawl/UrlDispatcher|True_Robot|turingos|\
Turnitinbot/1.5|URLy.Warning|VCI|VoidEYE|WebAuto|WebBandit|WebCopier|WebEMailExtrac.*|WebEnhancer|\
WebFetch|WebGo\sIS|Web.Image.Collector|Web\sImage\sCollector|WebLeacher|WebmasterWorldForumbot|\
WebReaper|WebSauger|Website\seXtractor|Website.Quester|Website\sQuester|Webster.Pro|WebStripper|\
Web\sSucker|WebWhacker|WebZip|Wget|Widow|[Ww]eb[Bb]andit|WWW-Collector-E|WWWOFFLE|\
Xaldon\sWebSpider|Xenu's|Zeus) [NC]
RewriteRule .? - [F]
Block Bad Bots with SetEnvIfNoCase
ErrorDocument 403 /403.html
# IF THE UA STARTS WITH THESE
SetEnvIfNoCase ^User-Agent$ .*(aesop_com_spiderman|alexibot|backweb|bandit|batchftp|bigfoot) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(black.?hole|blackwidow|blowfish|botalot|buddy|builtbottough|bullseye) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(cheesebot|cherrypicker|chinaclaw|collector|copier|copyrightcheck) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(cosmos|crescent|curl|custo|da|diibot|disco|dittospyder|dragonfly) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(drip|easydl|ebingbong|ecatch|eirgrabber|emailcollector|emailsiphon) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(emailwolf|erocrawler|exabot|eyenetie|filehound|flashget|flunky) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(frontpage|getright|getweb|go.?zilla|go-ahead-got-it|gotit|grabnet) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(grafula|harvest|hloader|hmview|httplib|httrack|humanlinks|ilsebot) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(infonavirobot|infotekies|intelliseek|interget|iria|jennybot|jetcar) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(joc|justview|jyxobot|kenjin|keyword|larbin|leechftp|lexibot|lftp|libweb) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(likse|linkscan|linkwalker|lnspiderguy|lwp|magnet|mag-net|markwatch) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(mata.?hari|memo|microsoft.?url|midown.?tool|miixpc|mirror|missigua) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(mister.?pix|moget|mozilla.?newt|nameprotect|navroad|backdoorbot|nearsite) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(net.?vampire|netants|netcraft|netmechanic|netspider|nextgensearchbot) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(attach|nicerspro|nimblecrawler|npbot|octopus|offline.?explorer) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(offline.?navigator|openfind|outfoxbot|pagegrabber|papa|pavuk) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(pcbrowser|php.?version.?tracker|pockey|propowerbot|prowebwalker) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(psbot|pump|queryn|recorder|realdownload|reaper|reget|true_robot) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(repomonkey|rma|internetseer|sitesnagger|siphon|slysearch|smartdownload) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(snake|snapbot|snoopy|sogou|spacebison|spankbot|spanner|sqworm|superbot) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(superhttp|surfbot|asterias|suzuran|szukacz|takeout|teleport) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(telesoft|the.?intraformant|thenomad|tighttwatbot|titan|urldispatcher) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(turingos|turnitinbot|urly.?warning|vacuum|vci|voideye|whacker) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(widow|wisenutbot|wwwoffle|xaldon|xenu|zeus|zyborg|anonymouse) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*web(zip|emaile|enhancer|fetch|go.?is|auto|bandit|clip|copier|master|reaper|sauger|site.?quester|whack) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(craftbot|download|extract|stripper|sucker|ninja|clshttp|webspider|leacher|collector|grabber|webpictures) HTTP_SAFE_BADBOT
SetEnvIfNoCase ^User-Agent$ .*(libwww-perl|aesop_com_spiderman) HTTP_SAFE_BADBOT
Deny from env=HTTP_SAFE_BADBOT
Original Bad Bot / Web Scraper List
WebBandit2icommerceAccoonaActiveTouristBotadressendeutschlandaipbotAlexibotAlligatorAllSubmitteralmadenanarchieAnonymousApexooAqua_Products-
asterias ASSORTATHENSAtHomeAtomzattacheautoemailspiderautohttpb2wbewBackDoorBotBadassBaiduspiderBaiduspider+BecomeBotbertsBitacleBiz360Black.HoleBlackWidowbladder fusionBlog CheckerBlogPeopleBlogshares SpidersBloodhoundBlowFishBoard BotBookmark search toolBotALotBotRightHereBot mailto:craftbot@yahoo.comBropwersBrowsezillaBuiltBotToughBullseye-
BunnySlippers CegbfeiehCFNetworkCheeseBotCherryPickerCrescentcharlotte/-
ChinaClaw ConveraCopernicCopyRightCheckcosmosCrescentc-spidercurlCustoCyberzDataCha0sDaumDewebDiggerDigimarcdigout4uagentDIIbotDISCoDittoSpyderDnloadMageDownloaddragonflyDreamPassportDSurfDTS AgentdumbotDynaWebe-collectorEasyDLEBrowseeCatchecollectoredgeioefp@gmx.netEirGrabberEmail ExtractorEmailCollectorEmailSiphonEmailWolfEmeraldShieldEnterprise_SearchEroCrawlerESurfEvalEverest-VulcanExabotExpressExtractorExtractorProEyeNetIEFairAdfastlwspiderfetchFEZheadFileHoundfindlinksFlaming AttackBotFlashGetFlickBotFoobot-
Forex Franklin LocatorFreshDownloadFrontPageFSurfGaisbotGamespy_Arcade-
genieBot GetBotGetleftGetRightGetWeb!Go!ZillaGo-Ahead-Got-ItGOFORITBOTGrabNetGrafulagrubHarvestHatena AntennaheritrixHLoaderHMViewholmesHooWWWerHouxouCrawlerHTTPGethttplibHTTPRetrieverHTTrackhumanlinksIBM_PlanetwideiCCrawlerichiroiGetterImage StripperImage Suckerimagefetchimds_monitorIncyWincyIndustry ProgramIndyInetURLInfoNaviRobotInstallShield DigitalWizardInterGETIRLbotIron33ISSpiderIUPUI Research BotJakartajava/JBH AgentJennyBotJetCarjeteyejeteyebotJoBoJOC Web SpiderKapereKenjinKeyword DensityKRetrieveksoapKWebGetLapozzBotlarbinleechLeechFTPLeechGetleipzig.deLexiBotlibWeblibwww-FMlibwww-perlLightningDownloadLinkextractorProLinkieLinkScanlinktigerLinkWalkerlmcrawlerLNSpiderguyLocalcomBotlooksmartLWPMac FinderMail Sweepermark.bloninMaSagool-
Mass Mata HariMCspiderMetaProducts Download ExpressMicrosoft Data AccessMicrosoft URL Control-
MIDown MIIxpcMirrorMissaugaMissouri College BrowseMisterMonstermkdbmogetMoreoverbotmothra/netscanMovableTypeMozi!Mozilla/22Mozilla/3.0 (compatible)Mozilla/5.0 (compatible; MSIE 5.0)MSIE_6.0MSIECrawlerMSProxyMVAClientMyFamilyBotMyGetRightnameprotectNASA SearchNaverNavroadNearSiteNetAntsnetattacheNetCartaNetMechanicNetResearchServerNetSpiderNetZIPNet VampireNEWT ActiveXNextopiaNICErsPROninjaNimbleCrawlernoxtrumbotNPBotOctopusOfflineOK MozillaOmniExplorerOpaLOpenbotOpenfindOpenTextSiteCrawlerOracle Ultra SearchOutfoxBotP3PPackRatPageGrabberPagmIEDownloadpanscientPapa FotopavukpcBrowserperlPerManPersonaPilotPHP versionPlantyNet_WebRobotplaystarmusicPluckerPort HuronProgram SharewareProgressive DownloadProPowerBotprospectorProWebWalkerProzillapsbotpsycheclonepufPushSitePussyCatPuxaRapidoPython-urllibQuepasaCreepQueryNRadiationRealDownloadRedCarpetRedKernelReGetrelevantnoise-
RepoMonkey RMARoverRsyncRTG30RufusSAPOSBIderscooterScoutAboutscriptsearchpreviewsearchtermsSeekbotSeriousShaishelobShim-CrawlerSickleBotsitecheckSiteSnaggerSlurpy VerifierSlySearchSmartDownloadsna-snaggerSnoopysogousootleSo-net" bat_botSpankBot" bat_botspanner" bat_botSpeedDownloadSpeglaSphereSphiderSpiderBotsprooseSQ WebscannerSqwormStaminaStanfordstudybotSuperBotSuperHTTPSurfbotSurfWalkersuzuranSzukacztAkeOutTALWinHttpClienttarspiderTeleportTelesoftTempleton-
TestBED The IntraformantTheNomadTightTwatBotTitantoCrawl/UrlDispatcherTrue_RobotturingosTurnitinBotTwisted PageGetterUCmoreUdmSearchUMBCUniversalFeedParserURL ControlURLGetFileURLy WarningURL_Spider_ProUtilMindvayalavobsubVCIVoidEYEVoilaBotvoyagerw3mirWeb Image CollectorWeb SuckerWeb2WAPWebaltBotWebAutoWebBanditWebCapturewebcollageWebCopierWebCopyWebEMailExtracWebEnhancerWebFetchWebFilterWebFountainWebGoWebLeacherWebMinerWebMirrorWebReaperWebSaugerWebSnakeWebsiteWebStripperWebVacwebwalkWebWhackerWebZIPWells SearchWEP Search 00WeRelateBotWgetWhosTalkingWidowWildsoft SurferWinHttpRequestWinHTTrackWUMPUSWWWOFFLEwwwsterWWW-CollectorXaldon-
Xenu's XenusXGETY!TunnelProYahooYSMcmYaDirectBotYetiZadeZBotzerxbotZeusZyBorg
--
Thanks,
B.Chandrashekhar.
No comments:
Post a Comment