I Apache
① . By modifying the. htaccess file
-
RewriteEngine?On -
RewriteCond?%{HTTP_USER_AGENT}? (^$|FeedDemon|Indy? Library|Alexa?Toolbar|AskTbFXTV|AhrefsBot|CrawlDaddy|CoolpadWebkit|Java|Feedly|UniversalFeedParser|ApacheBench|Microsoft?URL?Control|Swiftbot|ZmEu|oBot|jaunty|Python-urllib|lightDeckReports?Bot|YYSpider|DigExt|HttpClient|MJ12bot|heritrix|EasouSpider|Ezooms)? [NC] -
RewriteRule?^(.*)$?-? [F]
-
SetEnvIfNoCase?^User-Agent$?.*(FeedDemon|Indy? Library|Alexa?Toolbar|AskTbFXTV|AhrefsBot|CrawlDaddy|CoolpadWebkit|Java|Feedly|UniversalFeedParser|ApacheBench|Microsoft?URL?Control|Swiftbot|ZmEu|oBot|jaunty|Python-urllib|lightDeckReports?Bot|YYSpider|DigExt|HttpClient|MJ12bot|heritrix|EasouSpider|Ezooms)?BADBOT -
Order?Allow,Deny -
Allow?from?all -
Deny?from?env=BADBOT
② . By modifying the httpd.conf configuration file
-
DocumentRoot?/home/wwwroot/xxx -
<Directory? "/home/wwwroot/xxx" > -
SetEnvIfNoCase?User-Agent? ".*(FeedDemon|Indy? Library|Alexa?Toolbar|AskTbFXTV|AhrefsBot|CrawlDaddy|CoolpadWebkit|Java|Feedly|UniversalFeedParser|ApacheBench|Microsoft?URL?Control|Swiftbot|ZmEu|oBot|jaunty|Python-urllib|lightDeckReports?Bot|YYSpider|DigExt|HttpClient|MJ12bot|heritrix|EasouSpider|Ezooms)" ? BADBOT -
???????? Order?allow,deny -
???????? Allow?from?all -
??????? deny?from?env=BADBOT -
</Directory>
II Nginx code
-
#It is prohibited to grab tools such as Scrapy -
if ? ( $http_user_agent ?~*? (Scrapy|Curl|HttpClient))? { -
????? return ? 403; -
} -
#It is forbidden to specify UA and access with empty UA -
if ? ( $http_user_agent ?~? "FeedDemon|Indy? Library|Alexa?Toolbar|AskTbFXTV|AhrefsBot|CrawlDaddy|CoolpadWebkit|Java|Feedly|UniversalFeedParser|ApacheBench|Microsoft?URL?Control|Swiftbot|ZmEu|oBot|jaunty|Python-urllib|lightDeckReports?Bot|YYSpider|DigExt|HttpClient|MJ12bot|heritrix|EasouSpider|Ezooms|^$" ?)? { -
????? return ? 403; -
} -
#Forbid fetching in non GET | HEAD | POST mode -
if ? ( $request_method ?!~?^ (GET|HEAD|POST)$)? { -
???? return ? 403; -
}
-
include ? agent_deny.conf;
-
[ marsge@Mars_Server ?~]$? cat?/usr/local/nginx/conf/zhangge.conf -
location?/? { -
???????? try_files? $uri ? $uri /?/ index.php? $args ; -
????????# Add 1 line at this position: -
???????? include ? agent_deny.conf; -
???????? rewrite?^/sitemap_360_sp.txt$?/sitemap_360_sp.php?last; -
???????? rewrite?^/sitemap_baidu_sp.xml$?/sitemap_baidu_sp.php?last; -
???????? rewrite?^/sitemap_m.xml$?/sitemap_m.php?last;
-
/usr/local/nginx/sbin/nginx?- s?reload
III PHP code
-
//Get UA information -
$ua ?=? $_SERVER ['HTTP_USER_AGENT']; -
//Storing malicious USER_AGENT into the array -
$now_ua ?=? array ('FeedDemon?','BOT/0.1?(BOT? for ? JCE)','CrawlDaddy?','Java','Feedly','UniversalFeedParser','ApacheBench','Swiftbot','ZmEu','Indy?Library','oBot','jaunty','YandexBot','AhrefsBot','MJ12bot','WinHttp','EasouSpider','HttpClient','Microsoft?URL?Control','YYSpider','jaunty','Python-urllib','lightDeckReports?Bot'); -
//Forbid empty USER_AGENT, The mainstream collection programs such as dedecms are empty USER_AGENT, and some sql injection tools are also empty USER_AGENT -
if (! $ua )? { -
???? header( "Content-type:? text/html;?charset=utf-8" ); -
???? die ('Do not collect this station, because the collected stationmaster has no small JJ! '); -
} else { -
???? foreach ( $now_ua ? as ? $value ?) -
//Determine whether the UA exists in the array -
???? if ( eregi ( $value , $ua ))? { -
???????? header( "Content-type:? text/html;?charset=utf-8" ); -
???????? die ('Do not collect this station, because the collected stationmaster has no small JJ! '); -
????} -
}
4、 Test effect
curl -I -A 'YisouSpider' zhangge.net
curl -I -A '' zhangge.net
curl -I -A 'Baiduspider' zhangge.net
5、 Appendix: UA Collection
FeedDemon Content collection BOT/0.1 (BOT for JCE) sql injection CrawlDaddy sql injection Java Content collection Jullo Content collection Feedly Content collection UniversalFeedParser Content collection Apache Bench cc Attacker Swiftbot Useless crawler YandexBot Useless crawler AhrefsBot Useless crawler YisouSpider Useless crawler (acquired by UC Shenma Search, this spider can be released!) MJ12bot Useless crawler ZmEu phpmyadmin Vulnerability scanning WinHttp Collect cc attacks EasouSpider Useless crawler HttpClient tcp attack Microsoft URL Control Scan YYSpider Useless crawler Jaunty wordpress blasting scanner oBot Useless crawler Python-urllib Content collection Indy Library scanning FlightDeckReports Bot useless crawler Linguee Bot Useless crawler
6、 References