######################## # LAST EDIT: Fri Apr 18 20:49:45 1997 by Axel Boldt (boldt@math.ucsb.edu) ######################## # # Library of NoShit filter scripts # ================================ # # This is a library of filter scripts for the NoShit extension to # Cern's httpd. It tries to filter ads out of popular web sites. Read # more about NoShit at http://math-www.uni-paderborn.de/~axel/NoShit/ # # To use this library, put it in your directory ~/.webfilter and start # WebFilter like this: # webfilter -r ~/.webfilter/library.txt -r ~/.webfilter/conf # where ~/.webfilter/conf is the WebFilter configuration file. # # The newest version of this file is always available from # http://math-www.uni-paderborn.de/~axel/NoShit/library.txt # # For comments, corrections or additions, check out the interactive # NoShit response page at # http://emile.math.ucsb.edu:8000/HyperNews/get/comments/noshit.html # # The sed scripts are by me (Axel Boldt) and badly outdated by # now. Brad Threatt was kind enough to # write some up-to-date perl5 scripts. He is also willing to clean up # other sites. Let him now if you have specific requests. # # As you see, I'm a sed kind of guy, but feel free to submit filters # in any language you like. It's a great way to learn all about # regular expressions! # # I know it sucks, but when submitting additions to the filter script # library, please make sure that they run on the many brain-dead # non-GNU implementations out there. SUN for example is not even able # to program a decent sed. # ######################### # We want to use full featured extended regular expressions in our # patterns: RegExp On ######################### # Yahoo, a listing of web sites, http://www.yahoo.com # Filter http://.*yahoo\.com(.*/|.*\.html|/bin/search.*) \ exec perl -pe 's#^.*AdID=.*##;' ######################### # AltaVista, a search engine, http://altavista.digital.com # # This first rule drops everything past the form: If you like their little # software updates, take this out. Filter http://.*altavista\.digital\.com(.*/) \ exec perl -pe 's#^.*## if ($p == 1); \ $p = 1 if (m##i); \ s#
.*?ad\.doubleclick\.net.*?
##i; ' # Drop the self-promotional stuff, in addition to the ads. Filter http://.*altavista\.digital\.com(.*\.html|/cgi-bin/query.*) \ exec perl -pe 's#
.*?ad\.doubleclick\.net.*?
##i; \ s#.*Overwhelmed by \d+ documents\?.*##i; \ s#]*?/cgi-bin/redirect.*?##i; \ s#(]+)width=\d+%?([^>]+>)#$1$2$3#gi;' ######################### # CitySearch, http://www.citysearch.com # Filter http://(.*\.)?citysearch.*\.com(.*/|.*\.html) \ exec perl -pe 's#^.*try\-it\.cgi.*##;' ######################### # Bigbook, http://www.bigbook.com # Filter http://(.*\.)?bigbook\.com(.*/|.*\.html) \ exec perl -pe '$d=1 if (m#put_ad#); \ $d=0 if (m#
#); \ s#^.*## if ($d); \ s#width=\"?\d+%?\"?##i; ' ######################### # Microsoft's Slate, http://www.slate.com # Filter http://www\.slate\.com(.*/|.*\.html|.*\.asp) \ exec perl -pe 's###i; \ s#Set-Cookie:.*[\r\n]?##; ' ######################### # Suck, http://www.suck.com and http://www.netmoguls.com # # Added code to take out ads (and ad frames), but also to reformat it out # of the tres-annoying 640x480 mode that lame web designers love so much. # Filter http://(.*\.)?suck\.com(.*/|.*\.html) \ exec perl -pe 's#^.*##i; \ s###i; \ s###i; \ s#Set-Cookie:.*[\r\n]?##; \ s#\snowrap\s# #i; ' Filter http://(.*\.)?netmoguls\.com(.*/|.*\.html) \ exec perl -pe 's###i;' ######################### # Four11, a directory service, http://www.four11.com # Filter http://(.*\.)?four11\.com(.*/|.*/cgi-bin/.*|.*\.html) \ exec perl -pe 's###i; \ if (m##) { s#^.*?##i; $d = 0; } \ else { s#^.*##; } } ' ######################### # Lycos, an index of web pages, http://www.lycos.com # Filter http://(.*\.)?lycos\.com(.*/|.*/cgi-bin/.*|.*\.html) \ exec perl -pe 's###gi; \ s#(]+)width=\d+%?([^>]+>)#$1$2$3#gi;' ######################### # New York Times, http://www.nytimes.com # Filter http://(.*\.)?nytimes\.com(.*/|.*\.html) \ exec perl -pe 's#^.*]*ads/.*##i; ' ######################### # Price Watch, a hardware street price monitor, http://www.pricewatch.com # Filter http://(.*\.)?pricewatch\.com(.*/|.*\.htm|.*\.html) \ exec perl -pe 's#]*ad\.doubleclick.*##i; \ s#]*ad\.doubleclick.*?##i; ' ######################### # CNN, a news service, http://www.cnn.com # Filter http://(.*\.)?cnn\.com(.*/|.*\.html) \ exec perl -pe '$p = 0 if !defined(p); \ $p = 1 - $p if (m#^