inspector.py 61 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-"
  3. """
  4. UFONet - Denial of Service Toolkit - 2013/2018 - by psy (epsylon@riseup.net)
  5. You should have received a copy of the GNU General Public License along
  6. with UFONet; if not, write to the Free Software Foundation, Inc., 51
  7. Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  8. """
  9. import urllib, urllib2, ssl, random, re
  10. from urlparse import urlparse
  11. # Inspector spidering class
  12. class Inspector(object):
  13. def __init__(self,ufonet):
  14. self.ufonet=ufonet
  15. # set initial counters for objets
  16. self.c_images = 0
  17. self.c_mov = 0
  18. self.c_webm = 0
  19. self.c_avi = 0
  20. self.c_swf = 0
  21. self.c_mpg = 0
  22. self.c_mpeg = 0
  23. self.c_mp3 = 0
  24. self.c_ogg = 0
  25. self.c_ogv = 0
  26. self.c_wmv = 0
  27. self.c_css = 0
  28. self.c_js = 0
  29. self.c_xml = 0
  30. self.c_php = 0
  31. self.c_html = 0
  32. self.c_jsp = 0
  33. self.c_asp = 0
  34. self.c_txt = 0
  35. self.ctx = ssl.create_default_context() # creating context to bypass SSL cert validation (black magic)
  36. self.ctx.check_hostname = False
  37. self.ctx.verify_mode = ssl.CERT_NONE
  38. def proxy_transport(self, proxy):
  39. proxy_url = self.ufonet.extract_proxy(proxy)
  40. proxy = urllib2.ProxyHandler({'https': proxy_url})
  41. opener = urllib2.build_opener(proxy)
  42. urllib2.install_opener(opener)
  43. def inspecting(self, target):
  44. # inspect HTML target's components sizes (ex: http://target.com/foo)
  45. # [images, .mov, .webm, .avi, .swf, .mpg, .mpeg, .mp3, .ogg, .ogv,
  46. # .wmv, .css, .js, .xml, .php, .html, .jsp, .asp, .txt]
  47. biggest_files = {}
  48. if target.endswith(""):
  49. target.replace("", "/")
  50. self.ufonet.user_agent = random.choice(self.ufonet.agents).strip() # shuffle user-agent
  51. headers = {'User-Agent' : self.ufonet.user_agent, 'Referer' : self.ufonet.referer} # set fake user-agent and referer
  52. try:
  53. if self.ufonet.options.proxy: # set proxy
  54. self.proxy_transport(self.ufonet.options.proxy)
  55. req = urllib2.Request(target, None, headers)
  56. target_reply = urllib2.urlopen(req, context=self.ctx).read()
  57. else:
  58. req = urllib2.Request(target, None, headers)
  59. target_reply = urllib2.urlopen(req, context=self.ctx).read()
  60. except:
  61. print('[Error] [AI] Unable to connect to target -> [Exiting!]\n')
  62. return
  63. try: # search for image files
  64. regex_img = []
  65. regex_img1 = "<img src='(.+?)'" # search on target's results using regex with simple quotation
  66. regex_img.append(regex_img1)
  67. regex_img2 = '<img src="(.+?)"' # search on target's results using regex with double quotation
  68. regex_img.append(regex_img2)
  69. #regex_img3 = '<img src=(.+?)>' # search on target's results using regex without quotations
  70. #regex_img.append(regex_img3)
  71. for regimg in regex_img:
  72. pattern_img = re.compile(regimg)
  73. img_links = re.findall(pattern_img, target_reply)
  74. imgs = {}
  75. for img in img_links:
  76. if self.ufonet.options.proxy: # set proxy
  77. self.proxy_transport(self.ufonet.options.proxy)
  78. self.ufonet.user_agent = random.choice(self.ufonet.agents).strip() # shuffle user-agent
  79. headers = {'User-Agent' : self.ufonet.user_agent, 'Referer' : self.ufonet.referer} # set fake user-agent and referer
  80. print('+Image found: ' + img)
  81. try:
  82. if img.startswith('http'):
  83. if self.ufonet.options.proxy: # set proxy
  84. self.proxy_transport(self.ufonet.options.proxy)
  85. req = urllib2.Request(target_url, None, headers)
  86. img_file = urllib2.urlopen(req, context=self.ctx).read()
  87. else:
  88. req = urllib2.Request(target_url, None, headers)
  89. img_file = urllib2.urlopen(req, context=self.ctx).read()
  90. else:
  91. target_host = urlparse(target)
  92. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  93. if not target_url.endswith('/'): # add "/" to end of target
  94. target_url = target_url + "/"
  95. if self.ufonet.options.proxy: # set proxy
  96. self.proxy_transport(self.ufonet.options.proxy)
  97. req = urllib2.Request(target_url + img, None, headers)
  98. img_file = urllib2.urlopen(req, context=self.ctx).read()
  99. else:
  100. req = urllib2.Request(target_url + img, None, headers)
  101. img_file = urllib2.urlopen(req, context=self.ctx).read()
  102. size = len(img_file)
  103. except:
  104. print('[Error] [AI] Unable to retrieve info from Image -> [Discarding!]')
  105. size = 0
  106. imgs[img] = int(size)
  107. print('(Size: ' + str(size) + ' Bytes)')
  108. self.c_images = self.c_images + 1
  109. print '-'*12
  110. biggest_image = max(imgs.keys(), key=lambda x: imgs[x]) # search/extract biggest image value from dict
  111. biggest_files[biggest_image] = imgs[biggest_image] # add biggest image to list
  112. except: # if not any image found, go for next
  113. pass
  114. try: # search for .mov files
  115. regex_mov = []
  116. regex_mov1 = "<a href='(.+?.mov)'" # search on target's results using regex with simple quotation
  117. regex_mov.append(regex_mov1)
  118. regex_mov2 = '<a href="(.+?.mov)"' # search on target's results using regex with double quotation
  119. regex_mov.append(regex_mov2)
  120. #regex_mov3 = '<a href=(.+?.mov)' # search on target's results using regex without quotations
  121. #regex_mov.append(regex_mov3)
  122. for regmov in regex_mov:
  123. pattern_mov = re.compile(regmov)
  124. mov_links = re.findall(pattern_mov, target_reply)
  125. movs = {}
  126. for mov in mov_links:
  127. print('+Video (.mov) found: ' + mov)
  128. try:
  129. if mov.startswith('http'):
  130. if self.ufonet.options.proxy: # set proxy
  131. self.proxy_transport(self.ufonet.options.proxy)
  132. req = urllib2.Request(target_url, None, headers)
  133. mov_file = urllib2.urlopen(req, context=self.ctx).read()
  134. else:
  135. req = urllib2.Request(target_url, None, headers)
  136. mov_file = urllib2.urlopen(req, context=self.ctx).read()
  137. else:
  138. target_host = urlparse(target)
  139. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  140. if not target_url.endswith('/'): # add "/" to end of target
  141. target_url = target_url + "/"
  142. if self.ufonet.options.proxy: # set proxy
  143. self.proxy_transport(self.ufonet.options.proxy)
  144. req = urllib2.Request(target_url + mov, None, headers)
  145. mov_file = urllib2.urlopen(req, context=self.ctx).read()
  146. else:
  147. req = urllib2.Request(target_url + mov, None, headers)
  148. mov_file = urllib2.urlopen(req, context=self.ctx).read()
  149. size = len(mov_file)
  150. except:
  151. print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
  152. size = 0
  153. movs[mov] = int(size)
  154. print('(Size: ' + str(size) + ' Bytes)')
  155. self.c_mov = self.c_mov + 1
  156. print '-'*12
  157. biggest_mov = max(movs.keys(), key=lambda x: movs[x]) # search/extract biggest video (.mov) value from dict
  158. biggest_files[biggest_mov] = movs[biggest_mov] # add biggest video (.mov) to list
  159. except: # if not any .mov found, go for next
  160. pass
  161. try: # search for .webm files
  162. regex_webm = []
  163. regex_webm1 = "<a href='(.+?.webm)'" # search on target's results using regex with simple quotation
  164. regex_webm.append(regex_webm1)
  165. regex_webm2 = '<a href="(.+?.webm)"' # search on target's results using regex with double quotation
  166. regex_webm.append(regex_webm2)
  167. #regex_webm3 = '<a href=(.+?.webm)' # search on target's results using regex without quotations
  168. #regex_webm.append(regex_webm3)
  169. for regwebm in regex_webm:
  170. pattern_webm = re.compile(regwebm)
  171. webm_links = re.findall(pattern_webm, target_reply)
  172. webms = {}
  173. for webm in webm_links:
  174. print('+Video (.webm) found: ' + webm)
  175. try:
  176. if webm.startswith('http'):
  177. if self.ufonet.options.proxy: # set proxy
  178. self.proxy_transport(self.ufonet.options.proxy)
  179. req = urllib2.Request(target_url, None, headers)
  180. webm_file = urllib2.urlopen(req, context=self.ctx).read()
  181. else:
  182. req = urllib2.Request(target_url, None, headers)
  183. webm_file = urllib2.urlopen(req, context=self.ctx).read()
  184. else:
  185. target_host = urlparse(target)
  186. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  187. if not target_url.endswith('/'): # add "/" to end of target
  188. target_url = target_url + "/"
  189. if self.ufonet.options.proxy: # set proxy
  190. self.proxy_transport(self.ufonet.options.proxy)
  191. req = urllib2.Request(target_url + webm, None, headers)
  192. webm_file = urllib2.urlopen(req, context=self.ctx).read()
  193. else:
  194. req = urllib2.Request(target_url + webm, None, headers)
  195. webm_file = urllib2.urlopen(req, context=self.ctx).read()
  196. size = len(webm_file)
  197. except:
  198. print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
  199. size = 0
  200. webms[webm] = int(size)
  201. print('(Size: ' + str(size) + ' Bytes)')
  202. self.c_webm = self.c_webm + 1
  203. print '-'*12
  204. biggest_webm = max(webms.keys(), key=lambda x: webms[x]) # search/extract biggest video (.webm) value from dict
  205. biggest_files[biggest_webm] = webms[biggest_webm] # add biggest video (.webm) to list
  206. except: # if not any .webm found, go for next
  207. pass
  208. try: # search for .avi files
  209. regex_avi = []
  210. regex_avi1 = "<a href='(.+?.avi)'" # search on target's results using regex with simple quotation
  211. regex_avi.append(regex_avi1)
  212. regex_avi2 = '<a href="(.+?.avi)"' # search on target's results using regex with double quotation
  213. regex_avi.append(regex_avi2)
  214. #regex_avi3 = '<a href=(.+?.avi)' # search on target's results using regex without quotations
  215. #regex_avi.append(regex_avi3)
  216. for regavi in regex_avi:
  217. pattern_avi = re.compile(regavi)
  218. avi_links = re.findall(pattern_avi, target_reply)
  219. avis = {}
  220. for avi in avi_links:
  221. print('+Video (.avi) found: ' + avi)
  222. try:
  223. if avi.startswith('http'):
  224. if self.ufonet.options.proxy: # set proxy
  225. self.proxy_transport(self.ufonet.options.proxy)
  226. req = urllib2.Request(target_url, None, headers)
  227. avi_file = urllib2.urlopen(req, context=self.ctx).read()
  228. else:
  229. req = urllib2.Request(target_url, None, headers)
  230. avi_file = urllib2.urlopen(req, context=self.ctx).read()
  231. else:
  232. target_host = urlparse(target)
  233. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  234. if not target_url.endswith('/'): # add "/" to end of target
  235. target_url = target_url + "/"
  236. if self.ufonet.options.proxy: # set proxy
  237. self.proxy_transport(self.ufonet.options.proxy)
  238. req = urllib2.Request(target_url + avi, None, headers)
  239. avi_file = urllib2.urlopen(req, context=self.ctx).read()
  240. else:
  241. req = urllib2.Request(target_url + avi, None, headers)
  242. avi_file = urllib2.urlopen(req, context=self.ctx).read()
  243. size = len(avi_file)
  244. except:
  245. print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
  246. size = 0
  247. avis[avi] = int(size)
  248. print('(Size: ' + str(size) + ' Bytes)')
  249. self.c_avi = self.c_avi + 1
  250. print '-'*12
  251. biggest_avi = max(avis.keys(), key=lambda x: avis[x]) # search/extract biggest video (.avi) value from dict
  252. biggest_files[biggest_avi] = avis[biggest_avi] # add biggest video (.avi) to list
  253. except: # if not any .avi found, go for next
  254. pass
  255. try: # search for .swf files
  256. regex_swf = []
  257. regex_swf1 = "<value='(.+?.swf)'" # search on target's results using regex with simple quotation
  258. regex_swf.append(regex_swf1)
  259. regex_swf2 = '<value="(.+?.swf)"' # search on target's results using regex with double quotation
  260. regex_swf.append(regex_swf2)
  261. #regex_swf3 = '<value=(.+?.swf)' # search on target's results using regex without quotations
  262. #regex_swf.append(regex_swf3)
  263. for regswf in regex_swf:
  264. pattern_swf = re.compile(regswf)
  265. swf_links = re.findall(pattern_swf, target_reply)
  266. swfs = {}
  267. for swf in swf_links:
  268. print('+Flash (.swf) found: ' + swf)
  269. try:
  270. if swf.startswith('http'):
  271. if self.ufonet.options.proxy: # set proxy
  272. self.proxy_transport(self.ufonet.options.proxy)
  273. req = urllib2.Request(target_url, None, headers)
  274. swf_file = urllib2.urlopen(req, context=self.ctx).read()
  275. else:
  276. req = urllib2.Request(target_url, None, headers)
  277. swf_file = urllib2.urlopen(req, context=self.ctx).read()
  278. else:
  279. target_host = urlparse(target)
  280. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  281. if not target_url.endswith('/'): # add "/" to end of target
  282. target_url = target_url + "/"
  283. if self.ufonet.options.proxy: # set proxy
  284. self.proxy_transport(self.ufonet.options.proxy)
  285. req = urllib2.Request(target_url + swf, None, headers)
  286. swf_file = urllib2.urlopen(req, context=self.ctx).read()
  287. else:
  288. req = urllib2.Request(target_url + swf, None, headers)
  289. swf_file = urllib2.urlopen(req, context=self.ctx).read()
  290. size = len(swf_file)
  291. except:
  292. print('[Error] [AI] Unable to retrieve info from Flash -> [Discarding!]')
  293. size = 0
  294. swfs[swf] = int(size)
  295. print('(Size: ' + str(size) + ' Bytes)')
  296. self.c_swf = self.c_swf + 1
  297. print '-'*12
  298. biggest_swf = max(swfs.keys(), key=lambda x: swfs[x]) # search/extract biggest flash (.swf) value from dict
  299. biggest_files[biggest_swf] = swfs[biggest_swf] # add biggest flash (.swf) to list
  300. except: # if not any .swf found, go for next
  301. pass
  302. try: # search for .mpg files
  303. regex_mpg = []
  304. regex_mpg1 = "<src='(.+?.mpg)'" # search on target's results using regex with simple quotation
  305. regex_mpg.append(regex_mpg1)
  306. regex_mpg2 = '<src="(.+?.mpg)"' # search on target's results using regex with double quotation
  307. regex_mpg.append(regex_mpg2)
  308. #regex_mpg3 = '<src=(.+?.mpg)' # search on target's results using regex without quotations
  309. #regex_mpg.append(regex_mpg3)
  310. for regmpg in regex_mpg:
  311. pattern_mpg = re.compile(regmpg)
  312. mpg_links = re.findall(pattern_mpg, target_reply)
  313. mpgs = {}
  314. for mpg in mpg_links:
  315. print('+Video (.mpg) found: ' + mpg)
  316. try:
  317. if mpg.startswith('http'):
  318. if self.ufonet.options.proxy: # set proxy
  319. self.proxy_transport(self.ufonet.options.proxy)
  320. req = urllib2.Request(target_url, None, headers)
  321. mpg_file = urllib2.urlopen(req, context=self.ctx).read()
  322. else:
  323. req = urllib2.Request(target_url, None, headers)
  324. mpg_file = urllib2.urlopen(req, context=self.ctx).read()
  325. else:
  326. target_host = urlparse(target)
  327. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  328. if not target_url.endswith('/'): # add "/" to end of target
  329. target_url = target_url + "/"
  330. if self.ufonet.options.proxy: # set proxy
  331. self.proxy_transport(self.ufonet.options.proxy)
  332. req = urllib2.Request(target_url + mpg, None, headers)
  333. mpg_file = urllib2.urlopen(req, context=self.ctx).read()
  334. else:
  335. req = urllib2.Request(target_url + mpg, None, headers)
  336. mpg_file = urllib2.urlopen(req, context=self.ctx).read()
  337. size = len(mpg_file)
  338. except:
  339. print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
  340. size = 0
  341. mpgs[mpg] = int(size)
  342. print('(Size: ' + str(size) + ' Bytes)')
  343. self.c_mpg = self.c_mpg + 1
  344. print '-'*12
  345. biggest_mpg = max(mpgs.keys(), key=lambda x: mpgs[x]) # search/extract biggest video (.mpg) value from dict
  346. biggest_files[biggest_mpg] = mpgs[biggest_mpg] # add biggest video (.mpg) to list
  347. except: # if not any .mpg found, go for next
  348. pass
  349. try: # search for .mpeg files
  350. regex_mpeg = []
  351. regex_mpeg1 = "<src='(.+?.mpeg)'" # search on target's results using regex with simple quotation
  352. regex_mpeg.append(regex_mpeg1)
  353. regex_mpeg2 = '<src="(.+?.mpeg)"' # search on target's results using regex with double quotation
  354. regex_mpeg.append(regex_mpeg2)
  355. #regex_mpeg3 = '<src=(.+?.mpeg)' # search on target's results using regex without quotations
  356. #regex_mpeg.append(regex_mpeg3)
  357. for regmpeg in regex_mpeg:
  358. pattern_mpeg = re.compile(regmpeg)
  359. mpeg_links = re.findall(pattern_mpeg, target_reply)
  360. mpegs = {}
  361. for mpeg in mpeg_links:
  362. print('+Video (.mpeg) found: ' + mpeg)
  363. try:
  364. if mpeg.startswith('http'):
  365. if self.ufonet.options.proxy: # set proxy
  366. self.proxy_transport(self.ufonet.options.proxy)
  367. req = urllib2.Request(target_url, None, headers)
  368. mpeg_file = urllib2.urlopen(req, context=self.ctx).read()
  369. else:
  370. req = urllib2.Request(target_url, None, headers)
  371. mpeg_file = urllib2.urlopen(req, context=self.ctx).read()
  372. else:
  373. target_host = urlparse(target)
  374. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  375. if not target_url.endswith('/'): # add "/" to end of target
  376. target_url = target_url + "/"
  377. if self.ufonet.options.proxy: # set proxy
  378. self.proxy_transport(self.ufonet.options.proxy)
  379. req = urllib2.Request(target_url + mpeg, None, headers)
  380. mpeg_file = urllib2.urlopen(req, context=self.ctx).read()
  381. else:
  382. req = urllib2.Request(target_url + mpeg, None, headers)
  383. mpeg_file = urllib2.urlopen(req, context=self.ctx).read()
  384. size = len(mpeg_file)
  385. except:
  386. print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
  387. size = 0
  388. mpegs[mpeg] = int(size)
  389. print('(Size: ' + str(size) + ' Bytes)')
  390. self.c_mpeg = self.c_mpeg + 1
  391. print '-'*12
  392. biggest_mpeg = max(mpegs.keys(), key=lambda x: mpegs[x]) # search/extract biggest video (.mpeg) value from dict
  393. biggest_files[biggest_mpeg] = mpegs[biggest_mpeg] # add biggest video (.mpeg) to list
  394. except: # if not any .mpeg found, go for next
  395. pass
  396. try: # search for .mp3 files
  397. regex_mp3 = []
  398. regex_mp31 = "<src='(.+?.mp3)'" # search on target's results using regex with simple quotation
  399. regex_mp3.append(regex_mp31)
  400. regex_mp32 = '<src="(.+?.mp3)"' # search on target's results using regex with double quotation
  401. regex_mp3.append(regex_mp32)
  402. #regex_mp33 = '<src=(.+?.mp3)' # search on target's results using regex without quotations
  403. #regex_mp3.append(regex_mp33)
  404. for regmp3 in regex_mp3:
  405. pattern_mp3 = re.compile(regmp3)
  406. mp3_links = re.findall(pattern_mp3, target_reply)
  407. mp3s = {}
  408. for mp3 in mp3_links:
  409. print('+Audio (.mp3) found: ' + mp3)
  410. try:
  411. if mp3.startswith('http'):
  412. if self.ufonet.options.proxy: # set proxy
  413. self.proxy_transport(self.ufonet.options.proxy)
  414. req = urllib2.Request(target_url, None, headers)
  415. mp3_file = urllib2.urlopen(req, context=self.ctx).read()
  416. else:
  417. req = urllib2.Request(target_url, None, headers)
  418. mp3_file = urllib2.urlopen(req, context=self.ctx).read()
  419. else:
  420. target_host = urlparse(target)
  421. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  422. if not target_url.endswith('/'): # add "/" to end of target
  423. target_url = target_url + "/"
  424. if self.ufonet.options.proxy: # set proxy
  425. self.proxy_transport(self.ufonet.options.proxy)
  426. req = urllib2.Request(target_url + mp3, None, headers)
  427. mp3_file = urllib2.urlopen(req, context=self.ctx).read()
  428. else:
  429. req = urllib2.Request(target_url + mp3, None, headers)
  430. mp3_file = urllib2.urlopen(req, context=self.ctx).read()
  431. size = len(mp3_file)
  432. except:
  433. print('[Error] [AI] Unable to retrieve info from Audio -> [Discarding!]')
  434. size = 0
  435. mp3s[mp3] = int(size)
  436. print('(Size: ' + str(size) + ' Bytes)')
  437. self.c_mp3 = self.c_mp3 + 1
  438. print '-'*12
  439. biggest_mp3 = max(mp3s.keys(), key=lambda x: mp3s[x]) # search/extract biggest audio (.mp3) value from dict
  440. biggest_files[biggest_mp3] = mp3s[biggest_mp3] # add biggest audio (.mp3) to list
  441. except: # if not any .mp3 found, go for next
  442. pass
  443. try: # search for .mp4 files
  444. regex_mp4 = []
  445. regex_mp41 = "<src='(.+?.mp4)'" # search on target's results using regex with simple quotation
  446. regex_mp4.append(regex_mp41)
  447. regex_mp42 = '<src="(.+?.mp4)"' # search on target's results using regex with double quotation
  448. regex_mp4.append(regex_mp42)
  449. #regex_mp43 = '<src=(.+?.mp4)' # search on target's results using regex without quotations
  450. #regex_mp4.append(regex_mp43)
  451. for regmp4 in regex_mp4:
  452. pattern_mp4 = re.compile(regmp4)
  453. mp4_links = re.findall(pattern_mp4, target_reply)
  454. mp4s = {}
  455. for mp4 in mp4_links:
  456. print('+Video (.mp4) found: ' + mp4)
  457. try:
  458. if mp4.startswith('http'):
  459. if self.ufonet.options.proxy: # set proxy
  460. self.proxy_transport(self.ufonet.options.proxy)
  461. req = urllib2.Request(target_url, None, headers)
  462. mp4_file = urllib2.urlopen(req, context=self.ctx).read()
  463. else:
  464. req = urllib2.Request(target_url, None, headers)
  465. mp4_file = urllib2.urlopen(req, context=self.ctx).read()
  466. else:
  467. target_host = urlparse(target)
  468. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  469. if not target_url.endswith('/'): # add "/" to end of target
  470. target_url = target_url + "/"
  471. if self.ufonet.options.proxy: # set proxy
  472. self.proxy_transport(self.ufonet.options.proxy)
  473. req = urllib2.Request(target_url + mp4, None, headers)
  474. mp4_file = urllib2.urlopen(req, context=self.ctx).read()
  475. else:
  476. req = urllib2.Request(target_url + mp4, None, headers)
  477. mp4_file = urllib2.urlopen(req, context=self.ctx).read()
  478. size = len(mp4_file)
  479. except:
  480. print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
  481. size = 0
  482. mp4s[mp4] = int(size)
  483. print('(Size: ' + str(size) + ' Bytes)')
  484. self.c_mp4 = self.c_mp4 + 1
  485. print '-'*12
  486. biggest_mp4 = max(mp4s.keys(), key=lambda x: mp4s[x]) # search/extract biggest video (.mp4) value from dict
  487. biggest_files[biggest_mp4] = mp4s[biggest_mp4] # add biggest video (.mp4) to list
  488. except: # if not any .mp4 found, go for next
  489. pass
  490. try: # search for .ogg files
  491. regex_ogg = []
  492. regex_ogg1 = "<src='(.+?.ogg)'" # search on target's results using regex with simple quotation
  493. regex_ogg.append(regex_ogg1)
  494. regex_ogg2 = '<src="(.+?.ogg)"' # search on target's results using regex with double quotation
  495. regex_ogg.append(regex_ogg2)
  496. #regex_ogg3 = '<src=(.+?.ogg)' # search on target's results using regex without quotations
  497. #regex_ogg.append(regex_ogg3)
  498. for regogg in regex_ogg:
  499. pattern_ogg = re.compile(regogg)
  500. ogg_links = re.findall(pattern_ogg, target_reply)
  501. oggs = {}
  502. for ogg in ogg_links:
  503. print('+Audio (.ogg) found: ' + ogg)
  504. try:
  505. if ogg.startswith('http'):
  506. if self.ufonet.options.proxy: # set proxy
  507. self.proxy_transport(self.ufonet.options.proxy)
  508. req = urllib2.Request(target_url, None, headers)
  509. ogg_file = urllib2.urlopen(req, context=self.ctx).read()
  510. else:
  511. req = urllib2.Request(target_url, None, headers)
  512. ogg_file = urllib2.urlopen(req, context=self.ctx).read()
  513. else:
  514. target_host = urlparse(target)
  515. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  516. if not target_url.endswith('/'): # add "/" to end of target
  517. target_url = target_url + "/"
  518. if self.ufonet.options.proxy: # set proxy
  519. self.proxy_transport(self.ufonet.options.proxy)
  520. req = urllib2.Request(target_url + ogg, None, headers)
  521. ogg_file = urllib2.urlopen(req, context=self.ctx).read()
  522. else:
  523. req = urllib2.Request(target_url + ogg, None, headers)
  524. ogg_file = urllib2.urlopen(req, context=self.ctx).read()
  525. size = len(ogg_file)
  526. except:
  527. print('[Error] [AI] Unable to retrieve info from Audio -> [Discarding!]')
  528. size = 0
  529. oggs[ogg] = int(size)
  530. print('(Size: ' + str(size) + ' Bytes)')
  531. self.c_ogg = self.c_ogg + 1
  532. print '-'*12
  533. biggest_ogg = max(oggs.keys(), key=lambda x: oggs[x]) # search/extract biggest video (.ogg) value from dict
  534. biggest_files[biggest_ogg] = oggs[biggest_ogg] # add biggest video (.ogg) to list
  535. except: # if not any .ogg found, go for next
  536. pass
  537. try: # search for .ogv files
  538. regex_ogv = []
  539. regex_ogv1 = "<src='(.+?.ogv)'" # search on target's results using regex with simple quotation
  540. regex_ogv.append(regex_ogv1)
  541. regex_ogv2 = '<src="(.+?.ogv)"' # search on target's results using regex with double quotation
  542. regex_ogv.append(regex_ogv2)
  543. #regex_ogv3 = '<src=(.+?.ogv)' # search on target's results using regex without quotations
  544. #regex_ogv.append(regex_ogv3)
  545. for regogv in regex_ogv:
  546. pattern_ogv = re.compile(regogv)
  547. ogv_links = re.findall(pattern_ogv, target_reply)
  548. ogvs = {}
  549. for ogv in ogv_links:
  550. print('+Video (.ogv) found: ' + ogv)
  551. try:
  552. if ogv.startswith('http'):
  553. if self.ufonet.options.proxy: # set proxy
  554. self.proxy_transport(self.ufonet.options.proxy)
  555. req = urllib2.Request(target_url, None, headers)
  556. ogv_file = urllib2.urlopen(req, context=self.ctx).read()
  557. else:
  558. req = urllib2.Request(target_url, None, headers)
  559. ogv_file = urllib2.urlopen(req, context=self.ctx).read()
  560. else:
  561. target_host = urlparse(target)
  562. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  563. if not target_url.endswith('/'): # add "/" to end of target
  564. target_url = target_url + "/"
  565. if self.ufonet.options.proxy: # set proxy
  566. self.proxy_transport(self.ufonet.options.proxy)
  567. req = urllib2.Request(target_url + ogv, None, headers)
  568. ogv_file = urllib2.urlopen(req, context=self.ctx).read()
  569. else:
  570. req = urllib2.Request(target_url + ogv, None, headers)
  571. ogv_file = urllib2.urlopen(req, context=self.ctx).read()
  572. size = len(ogv_file)
  573. except:
  574. print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
  575. size = 0
  576. ogvs[ogv] = int(size)
  577. print('(Size: ' + str(size) + ' Bytes)')
  578. self.c_ogv = self.c_ogv + 1
  579. print '-'*12
  580. biggest_ogv = max(ogvs.keys(), key=lambda x: ogvs[x]) # search/extract biggest video (.ogv) value from dict
  581. biggest_files[biggest_ogv] = ogvs[biggest_ogv] # add biggest video (.ogv) to list
  582. except: # if not any .ogv found, go for next
  583. pass
  584. try: # search for .wmv files
  585. regex_wmv = []
  586. regex_wmv1 = "<src='(.+?.wmv)'" # search on target's results using regex with simple quotation
  587. regex_wmv.append(regex_wmv1)
  588. regex_wmv2 = '<src="(.+?.wmv)"' # search on target's results using regex with double quotation
  589. regex_wmv.append(regex_wmv2)
  590. #regex_wmv3 = '<src=(.+?.wmv)' # search on target's results using regex without quotations
  591. #regex_wmv.append(regex_wmv3)
  592. for regwmv in regex_wmv:
  593. pattern_wmv = re.compile(regwmv)
  594. wmv_links = re.findall(pattern_wmv, target_reply)
  595. wmvs = {}
  596. for wmv in wmv_links:
  597. print('+Video (.wmv) found: ' + wmv)
  598. try:
  599. if wmv.startswith('http'):
  600. if self.ufonet.options.proxy: # set proxy
  601. self.proxy_transport(self.ufonet.options.proxy)
  602. req = urllib2.Request(target_url, None, headers)
  603. wmv_file = urllib2.urlopen(req, context=self.ctx).read()
  604. else:
  605. req = urllib2.Request(target_url, None, headers)
  606. wmv_file = urllib2.urlopen(req, context=self.ctx).read()
  607. else:
  608. target_host = urlparse(target)
  609. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  610. if not target_url.endswith('/'): # add "/" to end of target
  611. target_url = target_url + "/"
  612. if self.ufonet.options.proxy: # set proxy
  613. self.proxy_transport(self.ufonet.options.proxy)
  614. req = urllib2.Request(target_url + wmv, None, headers)
  615. wmv_file = urllib2.urlopen(req, context=self.ctx).read()
  616. else:
  617. req = urllib2.Request(target_url + wmv, None, headers)
  618. wmv_file = urllib2.urlopen(req, context=self.ctx).read()
  619. size = len(wmv_file)
  620. except:
  621. print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
  622. size = 0
  623. wmvs[wmv] = int(size)
  624. print('(Size: ' + str(size) + ' Bytes)')
  625. self.c_wmv = self.c_wmv + 1
  626. print '-'*12
  627. biggest_wmv = max(wmvs.keys(), key=lambda x: wmvs[x]) # search/extract biggest video (.wmv) value from dict
  628. biggest_files[biggest_wmv] = wmvs[biggest_wmv] # add biggest video (.wmv) to list
  629. except: # if not any .wmv found, go for next
  630. pass
  631. try: # search for .css files
  632. regex_css = []
  633. regex_css1 = "href='(.+?.css[^']*)'" # search on target's results using regex with simple quotation
  634. regex_css.append(regex_css1)
  635. regex_css2 = 'href="(.+?.css[^"]*)"' # search on target's results using regex with double quotation
  636. regex_css.append(regex_css2)
  637. #regex_css3 = "href=(.+?.css[^']*)" # search on target's results using regex without quotations
  638. #regex_css.append(regex_css3)
  639. for regcss in regex_css:
  640. pattern_css = re.compile(regcss)
  641. css_links = re.findall(pattern_css, target_reply)
  642. csss = {}
  643. for css in css_links:
  644. print('+Style (.css) found: ' + css)
  645. try:
  646. if css.startswith('http'):
  647. if self.ufonet.options.proxy: # set proxy
  648. self.proxy_transport(self.ufonet.options.proxy)
  649. req = urllib2.Request(target_url, None, headers)
  650. css_file = urllib2.urlopen(req, context=self.ctx).read()
  651. else:
  652. req = urllib2.Request(target_url, None, headers)
  653. css_file = urllib2.urlopen(req, context=self.ctx).read()
  654. else:
  655. target_host = urlparse(target)
  656. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  657. if not target_url.endswith('/'): # add "/" to end of target
  658. target_url = target_url + "/"
  659. if self.ufonet.options.proxy: # set proxy
  660. self.proxy_transport(self.ufonet.options.proxy)
  661. req = urllib2.Request(target_url + css, None, headers)
  662. css_file = urllib2.urlopen(req, context=self.ctx).read()
  663. else:
  664. req = urllib2.Request(target_url + css, None, headers)
  665. css_file = urllib2.urlopen(req, context=self.ctx).read()
  666. size = len(css_file)
  667. except:
  668. print('[Error] [AI] Unable to retrieve info from Style -> [Discarding!]')
  669. size = 0
  670. csss[css] = int(size)
  671. print('(Size: ' + str(size) + ' Bytes)')
  672. self.c_css = self.c_css + 1
  673. print '-'*12
  674. biggest_css = max(csss.keys(), key=lambda x: csss[x]) # search/extract biggest style (.css) value from dict
  675. biggest_files[biggest_css] = csss[biggest_css] # add biggest style (.css) to list
  676. except: # if not any .css found, go for next
  677. pass
  678. try: # search for .js files
  679. regex_js = []
  680. regex_js1 = "src='(.+?.js[^']*)'" # search on target's results using regex with simple quotation
  681. regex_js.append(regex_js1)
  682. regex_js2 = 'src="(.+?.js[^"]*)"' # search on target's results using regex with double quotation
  683. regex_js.append(regex_js2)
  684. #regex_js3 = "src=(.+?.js[^']*)" # search on target's results using regex without quotations
  685. #regex_js.append(regex_js3)
  686. for regjs in regex_js:
  687. pattern_js = re.compile(regjs)
  688. js_links = re.findall(pattern_js, target_reply)
  689. jss = {}
  690. for js in js_links:
  691. print('+Script (.js) found: ' + js)
  692. try:
  693. if js.startswith('http'):
  694. if self.ufonet.options.proxy: # set proxy
  695. self.proxy_transport(self.ufonet.options.proxy)
  696. req = urllib2.Request(target_url, None, headers)
  697. js_file = urllib2.urlopen(req, context=self.ctx).read()
  698. else:
  699. req = urllib2.Request(target_url, None, headers)
  700. js_file = urllib2.urlopen(req, context=self.ctx).read()
  701. else:
  702. target_host = urlparse(target)
  703. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  704. if not target_url.endswith('/'): # add "/" to end of target
  705. target_url = target_url + "/"
  706. if self.ufonet.options.proxy: # set proxy
  707. self.proxy_transport(self.ufonet.options.proxy)
  708. req = urllib2.Request(target_url + js, None, headers)
  709. js_file = urllib2.urlopen(req, context=self.ctx).read()
  710. else:
  711. req = urllib2.Request(target_url + js, None, headers)
  712. js_file = urllib2.urlopen(req, context=self.ctx).read()
  713. size = len(js_file)
  714. except:
  715. print('[Error] [AI] Unable to retrieve info from Script -> [Discarding!]')
  716. size = 0
  717. jss[js] = int(size)
  718. print('(Size: ' + str(size) + ' Bytes)')
  719. self.c_js = self.c_js + 1
  720. print '-'*12
  721. biggest_js = max(jss.keys(), key=lambda x: jss[x]) # search/extract biggest script (.js) value from dict
  722. biggest_files[biggest_js] = jss[biggest_js] # add biggest script (.js) to list
  723. except: # if not any .js found, go for next
  724. pass
  725. try: # search for .xml files
  726. regex_xml = []
  727. regex_xml1 = "href='(.+?.xml)'" # search on target's results using regex with simple quotation
  728. regex_xml.append(regex_xml1)
  729. regex_xml2 = 'href="(.+?.xml)"' # search on target's results using regex with double quotation
  730. regex_xml.append(regex_xml2)
  731. #regex_xml3 = 'href=(.+?.xml)' # search on target's results using regex without quotations
  732. #regex_xml.append(regex_xml3)
  733. for regxml in regex_xml:
  734. pattern_xml = re.compile(regxml)
  735. xml_links = re.findall(pattern_xml, target_reply)
  736. xmls = {}
  737. for xml in xml_links:
  738. print('+Script (.xml) found: ' + xml)
  739. try:
  740. if xml.startswith('http'):
  741. if self.ufonet.options.proxy: # set proxy
  742. self.proxy_transport(self.ufonet.options.proxy)
  743. req = urllib2.Request(target_url, None, headers)
  744. xml_file = urllib2.urlopen(req, context=self.ctx).read()
  745. else:
  746. req = urllib2.Request(target_url, None, headers)
  747. xml_file = urllib2.urlopen(req, context=self.ctx).read()
  748. else:
  749. target_host = urlparse(target)
  750. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  751. if not target_url.endswith('/'): # add "/" to end of target
  752. target_url = target_url + "/"
  753. if self.ufonet.options.proxy: # set proxy
  754. self.proxy_transport(self.ufonet.options.proxy)
  755. req = urllib2.Request(target_url + xml, None, headers)
  756. xml_file = urllib2.urlopen(req, context=self.ctx).read()
  757. else:
  758. req = urllib2.Request(target_url + xml, None, headers)
  759. xml_file = urllib2.urlopen(req, context=self.ctx).read()
  760. size = len(xml_file)
  761. except:
  762. print('[Error] [AI] Unable to retrieve info from Script -> [Discarding!]')
  763. size = 0
  764. xmls[xml] = int(size)
  765. print('(Size: ' + str(size) + ' Bytes)')
  766. self.c_xml = self.c_xml + 1
  767. print '-'*12
  768. biggest_xml = max(xmls.keys(), key=lambda x: xmls[x]) # search/extract biggest script (.xml) value from dict
  769. biggest_files[biggest_xml] = xmls[biggest_xml] # add biggest script (.xml) to list
  770. except: # if not any .xml found, go for next
  771. pass
  772. try: # search for .php files
  773. regex_php = []
  774. regex_php1 = "href='(.+?.php)'" # search on target's results using regex with simple quotation
  775. regex_php.append(regex_php1)
  776. regex_php2 = 'href="(.+?.php)"' # search on target's results using regex with double quotation
  777. regex_php.append(regex_php2)
  778. #regex_php3 = 'href=(.+?.php)' # search on target's results using regex without quotations
  779. #regex_php.append(regex_php3)
  780. for regphp in regex_php:
  781. pattern_php = re.compile(regphp)
  782. php_links = re.findall(pattern_php, target_reply)
  783. phps = {}
  784. for php in php_links:
  785. print('+Webpage (.php) found: ' + php)
  786. try:
  787. if php.startswith('http'):
  788. if self.ufonet.options.proxy: # set proxy
  789. self.proxy_transport(self.ufonet.options.proxy)
  790. req = urllib2.Request(target_url, None, headers)
  791. php_file = urllib2.urlopen(req, context=self.ctx).read()
  792. else:
  793. req = urllib2.Request(target_url, None, headers)
  794. php_file = urllib2.urlopen(req, context=self.ctx).read()
  795. else:
  796. target_host = urlparse(target)
  797. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  798. if not target_url.endswith('/'): # add "/" to end of target
  799. target_url = target_url + "/"
  800. if self.ufonet.options.proxy: # set proxy
  801. self.proxy_transport(self.ufonet.options.proxy)
  802. req = urllib2.Request(target_url + php, None, headers)
  803. php_file = urllib2.urlopen(req, context=self.ctx).read()
  804. else:
  805. req = urllib2.Request(target_url + php, None, headers)
  806. php_file = urllib2.urlopen(req, context=self.ctx).read()
  807. size = len(php_file)
  808. except:
  809. print('[Error] [AI] Unable to retrieve info from Webpage -> [Discarding!]')
  810. size = 0
  811. phps[php] = int(size)
  812. print('(Size: ' + str(size) + ' Bytes)')
  813. self.c_php = self.c_php + 1
  814. print '-'*12
  815. biggest_php = max(phps.keys(), key=lambda x: phps[x]) # search/extract biggest file (.php) value from dict
  816. biggest_files[biggest_php] = phps[biggest_php] # add biggest file (.php) to list
  817. except: # if not any .php found, go for next
  818. pass
  819. try: # search for .html files
  820. regex_html = []
  821. regex_html1 = "href='(.+?.html)'" # search on target's results using regex with simple quotation
  822. regex_html.append(regex_html1)
  823. regex_html2 = 'href="(.+?.html)"' # search on target's results using regex with double quotation
  824. regex_html.append(regex_html2)
  825. #regex_html3 = 'href=(.+?.html)' # search on target's results using regex without quotations
  826. #regex_html.append(regex_html3)
  827. for reghtml in regex_html:
  828. pattern_html = re.compile(reghtml)
  829. html_links = re.findall(pattern_html, target_reply)
  830. htmls = {}
  831. for html in html_links:
  832. print('+Webpage (.html) found: ' + html)
  833. try:
  834. if html.startswith('http'):
  835. if self.ufonet.options.proxy: # set proxy
  836. self.proxy_transport(self.ufonet.options.proxy)
  837. req = urllib2.Request(target_url, None, headers)
  838. html_file = urllib2.urlopen(req, context=self.ctx).read()
  839. else:
  840. req = urllib2.Request(target_url, None, headers)
  841. html_file = urllib2.urlopen(req, context=self.ctx).read()
  842. else:
  843. target_host = urlparse(target)
  844. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  845. if not target_url.endswith('/'): # add "/" to end of target
  846. target_url = target_url + "/"
  847. if self.ufonet.options.proxy: # set proxy
  848. self.proxy_transport(self.ufonet.options.proxy)
  849. req = urllib2.Request(target_url + html, None, headers)
  850. html_file = urllib2.urlopen(req, context=self.ctx).read()
  851. else:
  852. req = urllib2.Request(target_url + html, None, headers)
  853. html_file = urllib2.urlopen(req, context=self.ctx).read()
  854. size = len(html_file)
  855. except:
  856. print('[Error] [AI] Unable to retrieve info from Webpage -> [Discarding!]')
  857. size = 0
  858. htmls[html] = int(size)
  859. print('(Size: ' + str(size) + ' Bytes)')
  860. self.c_html = self.c_html + 1
  861. print '-'*12
  862. biggest_html = max(htmls.keys(), key=lambda x: htmls[x]) # search/extract biggest file (.html) value from dict
  863. biggest_files[biggest_html] = htmls[biggest_html] # add biggest file (.html) to list
  864. except: # if not any .html found, go for next
  865. pass
  866. try: # search for .jsp files
  867. regex_jsp = []
  868. regex_jsp1 = "href='(.+?.jsp)'" # search on target's results using regex with simple quotation
  869. regex_jsp.append(regex_jsp1)
  870. regex_jsp2 = 'href="(.+?.jsp)"' # search on target's results using regex with double quotation
  871. regex_jsp.append(regex_jsp2)
  872. #regex_jsp3 = 'href=(.+?.jsp)' # search on target's results using regex without quotations
  873. #regex_jsp.append(regex_jsp3)
  874. for regjsp in regex_jsp:
  875. pattern_jsp = re.compile(regjsp)
  876. jsp_links = re.findall(pattern_jsp, target_reply)
  877. jsps = {}
  878. for jsp in jsp_links:
  879. print('+Webpage (.jsp) found: ' + jsp)
  880. try:
  881. if jsp.startswith('http'):
  882. if self.ufonet.options.proxy: # set proxy
  883. self.proxy_transport(self.ufonet.options.proxy)
  884. req = urllib2.Request(target_url, None, headers)
  885. jsp_file = urllib2.urlopen(req, context=self.ctx).read()
  886. else:
  887. req = urllib2.Request(target_url, None, headers)
  888. jsp_file = urllib2.urlopen(req, context=self.ctx).read()
  889. else:
  890. target_host = urlparse(target)
  891. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  892. if not target_url.endswith('/'): # add "/" to end of target
  893. target_url = target_url + "/"
  894. if self.ufonet.options.proxy: # set proxy
  895. self.proxy_transport(self.ufonet.options.proxy)
  896. req = urllib2.Request(target_url + jsp, None, headers)
  897. jsp_file = urllib2.urlopen(req, context=self.ctx).read()
  898. else:
  899. req = urllib2.Request(target_url + jsp, None, headers)
  900. jsp_file = urllib2.urlopen(req, context=self.ctx).read()
  901. size = len(jsp_file)
  902. except:
  903. print('[Error] [AI] Unable to retrieve info from Webpage -> [Discarding!]')
  904. size = 0
  905. jsps[jsp] = int(size)
  906. print('(Size: ' + str(size) + ' Bytes)')
  907. self.c_jsp = self.c_jsp + 1
  908. print '-'*12
  909. biggest_jsp = max(jsps.keys(), key=lambda x: jsps[x]) # search/extract biggest file (.jsp) value from dict
  910. biggest_files[biggest_jsp] = jsps[biggest_jsp] # add biggest file (.jsp) to list
  911. except: # if not any .jsp found, go for next
  912. pass
  913. try: # search for .asp files
  914. regex_asp = []
  915. regex_asp1 = "href='(.+?.asp)'" # search on target's results using regex with simple quotation
  916. regex_asp.append(regex_asp1)
  917. regex_asp2 = 'href="(.+?.asp)"' # search on target's results using regex with double quotation
  918. regex_asp.append(regex_asp2)
  919. #regex_asp3 = 'href=(.+?.asp)' # search on target's results using regex without quotations
  920. #regex_asp.append(regex_asp3)
  921. for regasp in regex_asp:
  922. pattern_asp = re.compile(regasp)
  923. asp_links = re.findall(pattern_asp, target_reply)
  924. asps = {}
  925. for asp in asp_links:
  926. print('+Webpage (.asp) found: ' + asp)
  927. try:
  928. if asp.startswith('http'):
  929. if self.ufonet.options.proxy: # set proxy
  930. self.proxy_transport(self.ufonet.options.proxy)
  931. req = urllib2.Request(target_url, None, headers)
  932. asp_file = urllib2.urlopen(req, context=self.ctx).read()
  933. else:
  934. req = urllib2.Request(target_url, None, headers)
  935. asp_file = urllib2.urlopen(req, context=self.ctx).read()
  936. else:
  937. target_host = urlparse(target)
  938. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  939. if not target_url.endswith('/'): # add "/" to end of target
  940. target_url = target_url + "/"
  941. if self.ufonet.options.proxy: # set proxy
  942. self.proxy_transport(self.ufonet.options.proxy)
  943. req = urllib2.Request(target_url + asp, None, headers)
  944. asp_file = urllib2.urlopen(req, context=self.ctx).read()
  945. else:
  946. req = urllib2.Request(target_url + asp, None, headers)
  947. asp_file = urllib2.urlopen(req, context=self.ctx).read()
  948. size = len(asp_file)
  949. except:
  950. print('[Error] [AI] Unable to retrieve info from Webpage -> [Discarding!]')
  951. size = 0
  952. asps[asp] = int(size)
  953. print('(Size: ' + str(size) + ' Bytes)')
  954. self.c_asp = self.c_asp + 1
  955. print '-'*12
  956. biggest_asp = max(asps.keys(), key=lambda x: asps[x]) # search/extract biggest file (.asp) value from dict
  957. biggest_files[biggest_asp] = asps[biggest_asp] # add biggest file (.asp) to list
  958. except: # if not any .asp found, go for next
  959. pass
  960. try: # search for .txt files
  961. regex_txt = []
  962. regex_txt1 = "href='(.+?.txt)'" # search on target's results using regex with simple quotation
  963. regex_txt.append(regex_txt1)
  964. regex_txt2 = 'href="(.+?.txt)"' # search on target's results using regex with double quotation
  965. regex_txt.append(regex_txt2)
  966. #regex_txt3 = 'href=(.+?.txt)' # search on target's results using regex without quotations
  967. #regex_txt.append(regex_txt3)
  968. for regtxt in regex_txt:
  969. pattern_txt = re.compile(regtxt)
  970. txt_links = re.findall(pattern_txt, target_reply)
  971. txts = {}
  972. for txt in txt_links:
  973. print('+File (.txt) found: ' + txt)
  974. try:
  975. if txt.startswith('http'):
  976. if self.ufonet.options.proxy: # set proxy
  977. self.proxy_transport(self.ufonet.options.proxy)
  978. req = urllib2.Request(target_url, None, headers)
  979. txt_file = urllib2.urlopen(req, context=self.ctx).read()
  980. else:
  981. req = urllib2.Request(target_url, None, headers)
  982. txt_file = urllib2.urlopen(req, context=self.ctx).read()
  983. else:
  984. target_host = urlparse(target)
  985. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  986. if not target_url.endswith('/'): # add "/" to end of target
  987. target_url = target_url + "/"
  988. if self.ufonet.options.proxy: # set proxy
  989. self.proxy_transport(self.ufonet.options.proxy)
  990. req = urllib2.Request(target_url + txt, None, headers)
  991. txt_file = urllib2.urlopen(req, context=self.ctx).read()
  992. else:
  993. req = urllib2.Request(target_url + txt, None, headers)
  994. txt_file = urllib2.urlopen(req, context=self.ctx).read()
  995. size = len(txt_file)
  996. except:
  997. print('[Error] [AI] Unable to retrieve info from Text file -> [Discarding!]')
  998. size = 0
  999. txts[txt] = int(size)
  1000. print('(Size: ' + str(size) + ' Bytes)')
  1001. self.c_txt = self.c_txt + 1
  1002. print '-'*12
  1003. biggest_txt = max(txts.keys(), key=lambda x: txts[x]) # search/extract biggest file (.txt) value from dict
  1004. biggest_files[biggest_txt] = txts[biggest_txt] # add biggest file (.txt) to list
  1005. except: # if not any .txt found, go for next
  1006. pass
  1007. print "\n", '='*80
  1008. total_objects = self.c_images + self.c_mov + self.c_webm + self.c_avi + self.c_swf + self.c_mpg + self.c_mpeg + self.c_mp3 + self.c_ogg + self.c_ogv + self.c_wmv + self.c_css + self.c_js + self.c_xml + self.c_php + self.c_html + self.c_jsp + self.c_asp + self.c_txt
  1009. print "Total objects found:", total_objects
  1010. print '-'*20
  1011. print "images:", self.c_images
  1012. print ".mov :", self.c_mov
  1013. print ".jsp :", self.c_jsp
  1014. print ".avi :", self.c_avi
  1015. print ".html :", self.c_html
  1016. print ".mpg :", self.c_mpg
  1017. print ".asp :", self.c_asp
  1018. print ".mp3 :", self.c_mp3
  1019. print ".js :", self.c_js
  1020. print ".ogv :", self.c_ogv
  1021. print ".wmv :", self.c_wmv
  1022. print ".css :", self.c_css
  1023. print ".mpeg :", self.c_mpeg
  1024. print ".xml :", self.c_xml
  1025. print ".php :", self.c_php
  1026. print ".txt :", self.c_txt
  1027. print ".webm :", self.c_webm
  1028. print ".ogg :", self.c_ogg
  1029. print ".swf :", self.c_swf
  1030. print '-'*20
  1031. print '='*80
  1032. if(biggest_files=={}):
  1033. print "\n[Info] [AI] Not any link found on target! -> [Exiting!]\n\n"
  1034. print '='*80 + '\n'
  1035. return
  1036. biggest_file_on_target = max(biggest_files.keys(), key=lambda x: biggest_files[x]) # search/extract biggest file value from dict
  1037. target_host = urlparse(target)
  1038. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  1039. if biggest_file_on_target.startswith('http'): # used for absolute links
  1040. for url,size in biggest_files.items(): # review all dict values
  1041. if url.startswith('http'):
  1042. if not target_url in url: # extract/dismiss external links
  1043. del biggest_files[url] # remove value from dict
  1044. biggest_file_on_target = max(biggest_files.keys(), key=lambda x: biggest_files[x]) # extract new value
  1045. print ('=Biggest File: ' + biggest_file_on_target)
  1046. else: # used for relative links
  1047. if not target_url.endswith('/'): # add "/" to end of target
  1048. target_url = target_url + "/"
  1049. print ('=Biggest File: ' + target_url + biggest_file_on_target)
  1050. print '='*80 + '\n'