So, I was actually looking at this script today and thought folks who use Bluecoat as proxies at their jobs (I get the impression that they are pretty popular) might be interested in checking it out. It’s kind of like a poor-man’s pcap solution for sites that use a robust Bluecoat proxy but don’t have pcap instrumentation everywhere.
If you give this script a URI, and a list of Bluecoat proxies, and some credentials to those proxies, it essentially goes and grabs the URI, writes it to disk and includes some information on the last time it was modified on disk, etc. Sometimes, you can use this to retrieve malicious payload that is otherwise unavailable to you due to take-down by LE or replay-filtering by the adversary.
Print usage with –help, make sure you define your setup variables appropriately before you run it, and I hope you find it useful.
#!/usr/bin/env python
# creds: I wrote most of this, only thing I used for inspiration was this HTML table parser article: http://simbot.wordpress.com/2006/05/17/html-table-parser-using-python/
# though honestly, his parser is much more feature-rich, his code taught me how the HTMLParser class works
# email me at mishley at-sign gmail dot com for cake and/or questions
import sys
import os
import urllib
from HTMLParser import HTMLParser
import optparse
import re
import time
# setup variables
default_proxies = [ "192.168.1.2", "192.168.1.3" ] # default list of proxies to use if -p is not provided
bluecoat_web_port = "3443" # web port to access bluecoat proxy web admin interface
bluecoat_web_user = "username" # username for above interface
bluecoat_web_pass = "password" # password for above interface
bluecoat_proxy_port = "3128" # proxy port to request that a proxy directly proxy a request, may also probably use 80
# parse command line args
parser = optparse.OptionParser()
parser.add_option("-u", "--uri", type="string", action="store", dest="uri", help="URI to retrieve. Must be a file object, not a directory.")
parser.add_option("-p", "--proxyip", type="string", action="append", dest="proxyip", help="Proxy IP addresses to search (defaults to all Bluecoats), can be used multiple times for multiple IP addresses. (if used more than once, --all is assumed)")
parser.add_option("-l", "--log", dest="log", action="store_true", default=False, help="Write file object metadata to log file, <filename>.log.")
parser.add_option("-a", "--all", dest="all", action="store_true", default=False, help="Grab a copy of the file from every proxy on which it is found, not just the first in the list. These files may be identical, use md5sum to check.")
options, args = parser.parse_args()
# input validation
if len(sys.argv) == 1:
parser.print_help()
sys.exit()
if options.proxyip and len(options.proxyip) > 1:
options.all = True
if not options.proxyip:
options.proxyip = default_proxies
else:
for i in options.proxyip:
if re.search('[^0-9\.]', i):
parser.error("Option --proxyip must use a valid IP address, exiting.")
if not options.uri:
parser.error("Option --uri is required for use, exiting.")
class proxyopen(urllib.FancyURLopener):
def prompt_user_passwd(self, host, realm):
return bluecoat_web_user, bluecoat_web_pass
def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 401 -- authentication required. This function supports Basic authentication only."""
self.tries += 1
if self.maxtries and self.tries >= self.maxtries:
self.tries = 0
return self.http_error_default(url, fp, 500, "HTTPS Basic Auth timed out after "+str(self.maxtries)+" attempts.", headers)
if not 'www-authenticate' in headers:
URLopener.http_error_default(self, url, fp, errcode, errmsg, headers)
stuff = headers['www-authenticate']
import re
match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
if not match:
URLopener.http_error_default(self, url, fp, errcode, errmsg, headers)
scheme, realm = match.groups()
if scheme.lower() != 'basic':
URLopener.http_error_default(self, url, fp, errcode, errmsg, headers)
name = 'retry_' + self.type + '_basic_auth'
if data is None:
return getattr(self,name)(url, realm)
else:
self.tries = 0
return getattr(self,name)(url, realm, data)
def checkURI(uri="http://www.google.com/favicon.ico", proxyip="192.168.1.2"):
opener = proxyopen()
protocol, domainandpath = uri.split('//')
protocol = protocol.rstrip(':')
if protocol != 'http':
sys.exit("Cannot process non-http requests, exiting.")
try: page = opener.open("https://" + proxyip + ":" + bluecoat_web_port + "/CE/Info/" + protocol + "/" + domainandpath).read()
except: return "NOCONN_0xDEADBEEF"
if page.find('Authentication required') > -1: return "NOAUTH_0xDEADBEEF"
if page.find('0x00000007') == -1 and page.find('CE URL Information') > -1: return page
else: return "NOTFOUND_0xDEADBEEF"
def fdURI(uri="http://www.google.com/favicon.ico", proxyip="192.168.1.2"):
proxy = { 'http': 'http://'+proxyip+':'+bluecoat_proxy_port }
fd = urllib.urlopen(uri, proxies=proxy)
return fd
class parseTable(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.in_table = 0
self.in_tr = 0
self.in_td = 0
self.tabledata = []
def handle_starttag(self, tag, attrs):
if tag == 'table': self.in_table = 1
if tag == 'tr': self.in_tr = 1
if tag == 'td': self.in_td = 1
def handle_data(self, data):
if self.in_td and self.in_tr and self.in_table:
self.tabledata.append(data)
def handle_endtag(self, tag):
if tag == 'table': self.in_table = 0
if tag == 'tr': self.in_tr = 0
if tag == 'td': self.in_td = 0
if __name__ == "__main__":
filename = options.uri.split('/')[-1]
for proxy in options.proxyip:
meta = checkURI(options.uri, proxy)
if meta == "NOCONN_0xDEADBEEF":
print "Unable to connect to proxy "+proxy+" via urllib to find URL '"+options.uri+"'."
continue
elif meta == "NOTFOUND_0xDEADBEEF":
print "Unable to locate URL '"+options.uri+"' in proxy "+proxy+"."
continue
elif meta == "NOAUTH_0xDEADBEEF":
print "Unable to authenticate to proxy "+proxy+"."
continue
else:
fd = fdURI(options.uri, proxy)
outstring = fd.read()
# we are going to re-grab meta data now that we've potentially
# modified the last-cached timestamp
meta = checkURI(options.uri, proxy)
tableparser = parseTable()
tableparser.feed(meta)
tableparser.close()
parsed = tableparser.tabledata
tableparser = None
lastretrieved = time.strftime("%Y%m%d_%H:%M:%S_UTC", time.strptime(' '.join(parsed[9].split()[2:4]), "%m/%d/%Y %H:%M:%S"))
fullname = filename+"_"+proxy+"_"+lastretrieved
outfile = open(fullname, 'wb')
outfile.write(outstring)
outfile.close()
fd.close()
print "Downloaded file '"+fullname+"' successfully."
if options.log:
logfile = open(fullname+".log", 'wb')
j = 0
for i in parsed:
j = j + 1
if j % 2 == 0: logfile.write(i+"\n")
else: logfile.write(i+" :: ")
logfile.close()
print "Successfully wrote metadata to file '"+fullname+".log'."
if options.all: continue
else: break