[VIM] scraping the Sun web site for alerts

Josh Bressers bressers at redhat.com
Thu Aug 9 11:14:34 UTC 2007


> 
> How are people automatically grabbing Sun alerts now that their page
> puts up a disclaimer?  This URL:
> 
>   http://sunsolve.sun.com/search/advsearch.do?collection=SUNALERT&type=collections&sort=date&queryKey4=%22category:security%22%20%22availability,%20s
> ecurity%22%20category:security&max=100
> 
> which was obtained from this main page:
> 
>   http://sunsolve.sun.com/show.do?target=security/sec
> 
> doesn't even list the most recent 6 or 7 alerts, even - once you get
> through the disclaimer, which I'd rather not spend the 15 minutes
> figuring out how to automate.  On principle, I shouldn't have to set
> cookies or other strange stuff just to get vuln information from a
> vendor.
> 
> I'm not sure I want to monitor their blog, but
> http://blogs.sun.com/security/category/Alerts seems like it might work
> for now.
> 

Here's the script we're using, it seems to work pretty well.

-- 
    JB

-------------- next part --------------
#!/usr/bin/env python

import urllib2
import sys
import cookielib,urllib,urllib2


# mschoene at redhat.com

# grab latest errors from sunsolve 

url = "http://sunsolve.sun.com/search/advsearch.do?queryKey1=&queryKey2=JDK+and+JRE+5.0+Update+12&queryKey3=&queryKey4=%22category%3Asecurity%22+%22availability%2C+security%22+category%3Asecurity&queryKey5=&type=collections&collection=all&collection=SUNALERT&repository=cds&language=en&max=100&updateDate=0&sort=date&Search=Search+%3E%3E"

class my_redirect_handler(urllib2.HTTPRedirectHandler):
	
	cookie =""
	
	def returncookie(self):
		return self.cookie
	
	def http_error_302(self, req, fp, code, msg, headers):
#		print "got response..."
		import re,sys
		self.cookie = re.search("Set-Cookie:\s+(.+?);\ Path", str(headers)).group(1).split("=")
#		print "got cookie...=%s" %self.cookie


def parsesundoc(doc):
	"""<td> 
      <b><a href="/search/document.do?assetkey=1-26-102957-1&searchclause=%22JDK%2420and%2420JRE%24205.0%2420Update%242012%22%2420%22category:security%22%2420%22availability,%2420security%22%2420category:security">
      Security Vulnerability With Java Web Start May Allow Application to Escalate Privileges</b></a>
      <table border="0" cellspacing="0" cellpadding="2">
        <tr> 
          <td nowrap class="smaller">Document ID: 102957</td>
          <td>&nbsp;</td>
          <td nowrap class="smaller">Jun 28, 2007</td>
          <td>&nbsp;</td>
          <td nowrap class="smaller">
          
               PUBLIC
          
          </td>
          <td>&nbsp;</td>                
          <td nowrap class="smaller">Sun Alert Notifications<br></td>
          <td>&nbsp;</td>
        </tr>
        <tr><td  height='8'></td>
      </table>
    </td>
"""

	import re
	z=re.compile("""<b><a href="(.+?)">\s+?(.+?)\s*?</b></a>.+?>Document ID: (\d+)</td>""",re.S|re.M)
	vulns = z.findall(doc)
	return vulns
	


def testwithfile():
	l = file("suntestdoc.html","r").read()
	vulns= parsesundoc(l)
	for (url,text,id) in vulns:
		print "%s=>%s" % (id,text)

	sys.exit(0)



if __name__=='__main__':
	f=file(sys.argv[1],"r").read()
	myhandler302 = my_redirect_handler()

	try:
		urlOpener = urllib2.build_opener(myhandler302)
		request = urllib2.Request(f, None)
		request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.4) Gecko/20070515 Firefox/2.0.0.7')
		url = urlOpener.open(request)

	except Exception, e:
#		print e
		pass


	(a,b)=myhandler302.returncookie()

	url=None

	try:
		urlOpener = urllib2.build_opener()
#		urlOpener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=0))
		request = urllib2.Request(f, None)
		request.add_header('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.4) Gecko/20070515 Firefox/2.0.0.7')
		request.add_header('Cookie',"ComparisonSurvey=true; s_sq=; s_cc=true; SunSolve_SLA=accept=Y; mySunSolveURL=http://sunsolve.sun.com; JSESSIONID=%s" % b)
		url = urlOpener.open(request)

	except Exception, e:
		print e
		pass

	#Cookie=s_sq=; s_cc=true; SunSolve_SLA=accept=Y; mySunSolveURL=http://sunsolve.sun.com; JSESSIONID=49b639551c663833b7378e75cabb

	z = url.read()

	# print url.headers

	# print z
	vulns = parsesundoc(z)

	for (url,text,id) in vulns:
		print "%s=>%s" % (id,text)


-------------- next part --------------
http://sunsolve.sun.com/search/advsearch.do?queryKey1=&queryKey2=JDK+and+JRE+5.0+Update+12&queryKey3=&queryKey4=%22category%3Asecurity%22+%22availability%2C+security%22+category%3Asecurity&queryKey5=&type=collections&collection=all&collection=SUNALERT&repository=cds&language=en&max=100&updateDate=0&sort=date&Search=Search+%3E%3E


More information about the VIM mailing list