this post is obviously out of date, so now you should try https://code.google.com/p/pyv8/
这个帖子已经明显过时了,你应该试试https://code.google.com/p/pyv8/
-------------------------
以前因为写了这段代码, 刷了一下google adsense的链接, 结果google不让我用adsense了.
因为google adsense中javascript代码应该是通过混淆的,当时分析没有找到头绪,就找到了spidermonkey.
这段代码主要是在python中通过spidermonkey类库, 动态执行混淆后的javascript,找到真正链接.
from spidermonkey import Runtime import time, urllib2, httplib, random, re import urllister def get(): httplib.HTTPConnection.debuglevel = 1 baseurl = 'http://pagead2.googlesyndication.com' dt = str(int(time.time() * 1000)) print 'start sleep...' time.sleep(random.randint(0, 9)) print dt, 'end sleep...' url = 'http://pagead2.googlesyndication.com/pagead/ads?client=ca-pub-6581680843370427&dt=' + dt + '&lmt=1156579898&format=468x60_as&output=html&url=http%3A%2F%2Flocalhost%2Ftest.html&color_bg=F6F6F6&color_text=9E5205&color_link=B8A80D&color_url=B8A80D&color_border=9E5205&ad_type=text&cc=100&u_h=800&u_w=1280&u_ah=779&u_aw=1280&u_cd=24&u_tz=480&u_his=1&u_java=true&u_nplug=9&u_nmime=91' request = urllib2.Request(url) request.add_header('USer-Agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.5) Gecko/20060731 Ubuntu/dapper-security Firefox/1.5.0.5') opener = urllib2.build_opener() conn = opener.open(request) html = conn.read() conn.close() print html regex = re.compile(r'function jcc\(a\).*function ha\(a\).*?jcc\(a\).*?\}') m = regex.search(html) if m is not None: func = m.group(0) func = re.sub(r'pha=document\.getElementById\(a\)', 'phahref=a', func) print func func = re.sub(r'pha\.href', 'phahref', func) print func func = re.sub(r"a=='aw0'", 'a == a', func) print func else: pass parser = urllister.URLLister() parser.feed(html) parser.close() href = baseurl + parser.href[0] javascript = func + ' href="' + href + '"; ha(href); print (href);' print javascript rt = Runtime() cx = rt.new_context() f = cx.eval_script(javascript) s = cx.get_global("phahref"); print s request = urllib2.Request(s) request.add_header('USer-Agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.5) Gecko/20060731 Ubuntu/dapper-security Firefox/1.5.0.5') opener = urllib2.build_opener() conn = opener.open(request) html = conn.read() conn.close() if __name__ == '__main__': import thread import analy for i in range(20): print get thread.start_new_thread(analy.get, ()) print i
-->
参考网址:
http://wwwsearch.sourceforge.net/python-spidermonkey/
http://pypi.python.org/pypi/python-spidermonkey/0.0.1a
谢谢,看了有收获,好
ReplyDelete