1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768 |
- # coding: utf-8
- import urllib2
- from refresh_config import host_list, url_list
- import threading
- threadLock = threading.Lock()
- class Urltool(threading.Thread):
- def __init__(self, threadID, http_type, url, headers):
- threading.Thread.__init__(self)
- self.hostlist = host_list
- self.http_type = http_type
- self.url = url
- self.headers = headers
- self.threadID = threadID
- def generaterl(self):
- self.urls = []
- for host in self.hostlist:
- myurl = '%s://%s%s' % (self.http_type, host, self.url)
- request = urllib2.Request(myurl, headers={"Host": self.headers,
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'})
- self.urls.append(request)
- return self.urls
- # @property
- def run(self):
- print 'number of %s task starting.' % self.threadID
- self.generaterl()
- self.results = []
- for url in self.urls:
- try:
- response = urllib2.urlopen(url, timeout=3)
- # print response.url, response.code
- self.results.append([url.get_header('Host'), url.get_host(), response.url, response.code])
- # 状态码异常
- except urllib2.HTTPError, e:
- self.results.append([url.get_header('Host'), url.get_host(), e.geturl(), e.code])
- # 连接异常
- except urllib2.URLError, e:
- self.results.append([url.get_header('Host'), url.get_host(), e, 999])
- # 数据错误
- except urllib2.httplib.BadStatusLine, e:
- self.results.append([url.get_header('Host'), url.get_host(), e, 998])
- # 超时或其他
- except Exception, e:
- self.results.append([url.get_header('Host'), url.get_host(), e, 997])
- # return self.results
- threadLock.acquire()
- print self.results
- threadLock.release()
- if __name__ == '__main__':
- for intid, url in enumerate(url_list):
- if url == 'www.test.com':
- task = Urltool(intid, 'http', '/func.php?uid=12026277&size=big&time=1452578235', url)
- task.start()
- else:
- task = Urltool(intid, 'http', '', url)
- task.start()
|