inspector.py 59 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-"
  3. """
  4. This file is part of the UFONet project, https://ufonet.03c8.net
  5. Copyright (c) 2013/2020 | psy <epsylon@riseup.net>
  6. You should have received a copy of the GNU General Public License along
  7. with UFONet; if not, write to the Free Software Foundation, Inc., 51
  8. Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  9. """
  10. import ssl, random, re
  11. import urllib.request, urllib.error
  12. from urllib.parse import urlparse as urlparse
  13. # Inspector spidering class
  14. class Inspector(object):
  15. def __init__(self,ufonet):
  16. self.ufonet=ufonet
  17. # set initial counters for objets
  18. self.c_images = 0
  19. self.c_mov = 0
  20. self.c_webm = 0
  21. self.c_avi = 0
  22. self.c_swf = 0
  23. self.c_mpg = 0
  24. self.c_mpeg = 0
  25. self.c_mp3 = 0
  26. self.c_ogg = 0
  27. self.c_ogv = 0
  28. self.c_wmv = 0
  29. self.c_css = 0
  30. self.c_js = 0
  31. self.c_xml = 0
  32. self.c_php = 0
  33. self.c_html = 0
  34. self.c_jsp = 0
  35. self.c_asp = 0
  36. self.c_txt = 0
  37. self.ctx = ssl.create_default_context() # creating context to bypass SSL cert validation (black magic)
  38. self.ctx.check_hostname = False
  39. self.ctx.verify_mode = ssl.CERT_NONE
  40. def proxy_transport(self, proxy):
  41. proxy_url = self.ufonet.extract_proxy(proxy)
  42. proxy = urllib.request.ProxyHandler({'https': proxy_url})
  43. opener = urllib.request.build_opener(proxy)
  44. urllib.request.install_opener(opener)
  45. def inspecting(self, target):
  46. # inspect HTML target's components sizes (ex: http://target.com/foo)
  47. # [images, .mov, .webm, .avi, .swf, .mpg, .mpeg, .mp3, .ogg, .ogv,
  48. # .wmv, .css, .js, .xml, .php, .html, .jsp, .asp, .txt]
  49. biggest_files = {}
  50. if target.endswith(""):
  51. target.replace("", "/")
  52. self.ufonet.user_agent = random.choice(self.ufonet.agents).strip() # shuffle user-agent
  53. headers = {'User-Agent' : self.ufonet.user_agent, 'Referer' : self.ufonet.referer} # set fake user-agent and referer
  54. try:
  55. if self.ufonet.options.proxy: # set proxy
  56. self.proxy_transport(self.ufonet.options.proxy)
  57. req = urllib.request.Request(target, None, headers)
  58. target_reply = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  59. else:
  60. req = urllib.request.Request(target, None, headers)
  61. target_reply = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  62. except:
  63. print('[Error] [AI] Unable to connect to target -> [Exiting!]\n')
  64. return
  65. try: # search for image files
  66. regex_img = []
  67. regex_img1 = "<img src='(.+?)'" # search on target's results using regex with simple quotation
  68. regex_img.append(regex_img1)
  69. regex_img2 = '<img src="(.+?)"' # search on target's results using regex with double quotation
  70. regex_img.append(regex_img2)
  71. #regex_img3 = '<img src=(.+?)>' # search on target's results using regex without quotations
  72. #regex_img.append(regex_img3)
  73. for regimg in regex_img:
  74. pattern_img = re.compile(regimg)
  75. img_links = re.findall(pattern_img, target_reply)
  76. imgs = {}
  77. for img in img_links:
  78. if self.ufonet.options.proxy: # set proxy
  79. self.proxy_transport(self.ufonet.options.proxy)
  80. self.ufonet.user_agent = random.choice(self.ufonet.agents).strip() # shuffle user-agent
  81. headers = {'User-Agent' : self.ufonet.user_agent, 'Referer' : self.ufonet.referer} # set fake user-agent and referer
  82. try:
  83. if img.startswith('http'):
  84. size = 0
  85. else:
  86. target_host = urlparse(target)
  87. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  88. if not target_url.endswith('/'): # add "/" to end of target
  89. target_url = target_url + "/"
  90. if img.startswith("data:image"):
  91. size = 0
  92. else:
  93. if img.startswith('/'):
  94. img = img.replace("/", "", 1)
  95. try:
  96. if self.ufonet.options.proxy: # set proxy
  97. self.proxy_transport(self.ufonet.options.proxy)
  98. req = urllib.request.Request(target_url + img, None, headers)
  99. img_file = urllib.request.urlopen(req, context=self.ctx).read()
  100. else:
  101. req = urllib.request.Request(target_url + img, None, headers)
  102. img_file = urllib.request.urlopen(req, context=self.ctx).read()
  103. print('+Image found: ' + target_url + img.split('"')[0])
  104. size = len(img_file)
  105. print('(Size: ' + str(size) + ' Bytes)')
  106. imgs[img] = int(size)
  107. self.c_images = self.c_images + 1
  108. print('-'*12)
  109. except:
  110. size = 0
  111. except:
  112. print('[Error] [AI] Unable to retrieve info from Image -> [Discarding!]')
  113. size = 0
  114. biggest_image = max(list(imgs.keys()), key=lambda x: imgs[x]) # search/extract biggest image value from dict
  115. if biggest_image:
  116. biggest_files[biggest_image] = imgs[biggest_image] # add biggest image to list
  117. except: # if not any image found, go for next
  118. pass
  119. try: # search for .mov files
  120. regex_mov = []
  121. regex_mov1 = "<a href='(.+?.mov)'" # search on target's results using regex with simple quotation
  122. regex_mov.append(regex_mov1)
  123. regex_mov2 = '<a href="(.+?.mov)"' # search on target's results using regex with double quotation
  124. regex_mov.append(regex_mov2)
  125. #regex_mov3 = '<a href=(.+?.mov)' # search on target's results using regex without quotations
  126. #regex_mov.append(regex_mov3)
  127. for regmov in regex_mov:
  128. pattern_mov = re.compile(regmov)
  129. mov_links = re.findall(pattern_mov, target_reply)
  130. movs = {}
  131. for mov in mov_links:
  132. try:
  133. if mov.startswith('http'):
  134. size = 0
  135. else:
  136. target_host = urlparse(target)
  137. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  138. if not target_url.endswith('/'): # add "/" to end of target
  139. target_url = target_url + "/"
  140. try:
  141. if self.ufonet.options.proxy: # set proxy
  142. self.proxy_transport(self.ufonet.options.proxy)
  143. req = urllib.request.Request(target_url + mov, None, headers)
  144. mov_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  145. else:
  146. req = urllib.request.Request(target_url + mov, None, headers)
  147. mov_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  148. print('+Video (.mov) found: ' + target_url + mov.split('"')[0])
  149. size = len(mov_file)
  150. movs[mov] = int(size)
  151. print('(Size: ' + str(size) + ' Bytes)')
  152. self.c_mov = self.c_mov + 1
  153. print('-'*12)
  154. except:
  155. size = 0
  156. except:
  157. print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
  158. size = 0
  159. biggest_mov = max(list(movs.keys()), key=lambda x: movs[x]) # search/extract biggest video (.mov) value from dict
  160. if biggest_mov:
  161. biggest_files[biggest_mov] = movs[biggest_mov] # add biggest video (.mov) to list
  162. except: # if not any .mov found, go for next
  163. pass
  164. try: # search for .webm files
  165. regex_webm = []
  166. regex_webm1 = "<a href='(.+?.webm)'" # search on target's results using regex with simple quotation
  167. regex_webm.append(regex_webm1)
  168. regex_webm2 = '<a href="(.+?.webm)"' # search on target's results using regex with double quotation
  169. regex_webm.append(regex_webm2)
  170. #regex_webm3 = '<a href=(.+?.webm)' # search on target's results using regex without quotations
  171. #regex_webm.append(regex_webm3)
  172. for regwebm in regex_webm:
  173. pattern_webm = re.compile(regwebm)
  174. webm_links = re.findall(pattern_webm, target_reply)
  175. webms = {}
  176. for webm in webm_links:
  177. try:
  178. if webm.startswith('http'):
  179. size = 0
  180. else:
  181. target_host = urlparse(target)
  182. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  183. if not target_url.endswith('/'): # add "/" to end of target
  184. target_url = target_url + "/"
  185. try:
  186. if self.ufonet.options.proxy: # set proxy
  187. self.proxy_transport(self.ufonet.options.proxy)
  188. req = urllib.request.Request(target_url + webm, None, headers)
  189. webm_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  190. else:
  191. req = urllib.request.Request(target_url + webm, None, headers)
  192. webm_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  193. print('+Video (.webm) found: ' + target_url + webm.split('"')[0])
  194. size = len(webm_file)
  195. webms[webm] = int(size)
  196. print('(Size: ' + str(size) + ' Bytes)')
  197. self.c_webm = self.c_webm + 1
  198. print('-'*12)
  199. except:
  200. size = 0
  201. except:
  202. print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
  203. size = 0
  204. biggest_webm = max(list(webms.keys()), key=lambda x: webms[x]) # search/extract biggest video (.webm) value from dict
  205. if biggest_webm:
  206. biggest_files[biggest_webm] = webms[biggest_webm] # add biggest video (.webm) to list
  207. except: # if not any .webm found, go for next
  208. pass
  209. try: # search for .avi files
  210. regex_avi = []
  211. regex_avi1 = "<a href='(.+?.avi)'" # search on target's results using regex with simple quotation
  212. regex_avi.append(regex_avi1)
  213. regex_avi2 = '<a href="(.+?.avi)"' # search on target's results using regex with double quotation
  214. regex_avi.append(regex_avi2)
  215. #regex_avi3 = '<a href=(.+?.avi)' # search on target's results using regex without quotations
  216. #regex_avi.append(regex_avi3)
  217. for regavi in regex_avi:
  218. pattern_avi = re.compile(regavi)
  219. avi_links = re.findall(pattern_avi, target_reply)
  220. avis = {}
  221. for avi in avi_links:
  222. try:
  223. if avi.startswith('http'):
  224. size = 0
  225. else:
  226. target_host = urlparse(target)
  227. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  228. if not target_url.endswith('/'): # add "/" to end of target
  229. target_url = target_url + "/"
  230. try:
  231. if self.ufonet.options.proxy: # set proxy
  232. self.proxy_transport(self.ufonet.options.proxy)
  233. req = urllib.request.Request(target_url + avi, None, headers)
  234. avi_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  235. else:
  236. req = urllib.request.Request(target_url + avi, None, headers)
  237. avi_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  238. print('+Video (.avi) found: ' + target_url + avi.split('"')[0])
  239. size = len(avi_file)
  240. avis[avi] = int(size)
  241. print('(Size: ' + str(size) + ' Bytes)')
  242. self.c_avi = self.c_avi + 1
  243. print('-'*12)
  244. except:
  245. size = 0
  246. except:
  247. print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
  248. size = 0
  249. biggest_avi = max(list(avis.keys()), key=lambda x: avis[x]) # search/extract biggest video (.avi) value from dict
  250. if biggest_avi:
  251. biggest_files[biggest_avi] = avis[biggest_avi] # add biggest video (.avi) to list
  252. except: # if not any .avi found, go for next
  253. pass
  254. try: # search for .swf files
  255. regex_swf = []
  256. regex_swf1 = "<value='(.+?.swf)'" # search on target's results using regex with simple quotation
  257. regex_swf.append(regex_swf1)
  258. regex_swf2 = '<value="(.+?.swf)"' # search on target's results using regex with double quotation
  259. regex_swf.append(regex_swf2)
  260. #regex_swf3 = '<value=(.+?.swf)' # search on target's results using regex without quotations
  261. #regex_swf.append(regex_swf3)
  262. for regswf in regex_swf:
  263. pattern_swf = re.compile(regswf)
  264. swf_links = re.findall(pattern_swf, target_reply)
  265. swfs = {}
  266. for swf in swf_links:
  267. try:
  268. if swf.startswith('http'):
  269. size = 0
  270. else:
  271. target_host = urlparse(target)
  272. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  273. if not target_url.endswith('/'): # add "/" to end of target
  274. target_url = target_url + "/"
  275. try:
  276. if self.ufonet.options.proxy: # set proxy
  277. self.proxy_transport(self.ufonet.options.proxy)
  278. req = urllib.request.Request(target_url + swf, None, headers)
  279. swf_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  280. else:
  281. req = urllib.request.Request(target_url + swf, None, headers)
  282. swf_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  283. print('+Flash (.swf) found: ' + target_url + swf.split('"')[0])
  284. size = len(swf_file)
  285. swfs[swf] = int(size)
  286. print('(Size: ' + str(size) + ' Bytes)')
  287. self.c_swf = self.c_swf + 1
  288. print('-'*12)
  289. except:
  290. size = 0
  291. except:
  292. print('[Error] [AI] Unable to retrieve info from Flash -> [Discarding!]')
  293. size = 0
  294. biggest_swf = max(list(swfs.keys()), key=lambda x: swfs[x]) # search/extract biggest flash (.swf) value from dict
  295. if biggest_swf:
  296. biggest_files[biggest_swf] = swfs[biggest_swf] # add biggest flash (.swf) to list
  297. except: # if not any .swf found, go for next
  298. pass
  299. try: # search for .mpg files
  300. regex_mpg = []
  301. regex_mpg1 = "<src='(.+?.mpg)'" # search on target's results using regex with simple quotation
  302. regex_mpg.append(regex_mpg1)
  303. regex_mpg2 = '<src="(.+?.mpg)"' # search on target's results using regex with double quotation
  304. regex_mpg.append(regex_mpg2)
  305. #regex_mpg3 = '<src=(.+?.mpg)' # search on target's results using regex without quotations
  306. #regex_mpg.append(regex_mpg3)
  307. for regmpg in regex_mpg:
  308. pattern_mpg = re.compile(regmpg)
  309. mpg_links = re.findall(pattern_mpg, target_reply)
  310. mpgs = {}
  311. for mpg in mpg_links:
  312. try:
  313. if mpg.startswith('http'):
  314. size = 0
  315. else:
  316. target_host = urlparse(target)
  317. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  318. if not target_url.endswith('/'): # add "/" to end of target
  319. target_url = target_url + "/"
  320. try:
  321. if self.ufonet.options.proxy: # set proxy
  322. self.proxy_transport(self.ufonet.options.proxy)
  323. req = urllib.request.Request(target_url + mpg, None, headers)
  324. mpg_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  325. else:
  326. req = urllib.request.Request(target_url + mpg, None, headers)
  327. mpg_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  328. print('+Video (.mpg) found: ' + target_url + mpg.split('"')[0])
  329. size = len(mpg_file)
  330. mpgs[mpg] = int(size)
  331. print('(Size: ' + str(size) + ' Bytes)')
  332. self.c_mpg = self.c_mpg + 1
  333. print('-'*12)
  334. except:
  335. size = 0
  336. except:
  337. print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
  338. size = 0
  339. biggest_mpg = max(list(mpgs.keys()), key=lambda x: mpgs[x]) # search/extract biggest video (.mpg) value from dict
  340. if biggest_mpg:
  341. biggest_files[biggest_mpg] = mpgs[biggest_mpg] # add biggest video (.mpg) to list
  342. except: # if not any .mpg found, go for next
  343. pass
  344. try: # search for .mpeg files
  345. regex_mpeg = []
  346. regex_mpeg1 = "<src='(.+?.mpeg)'" # search on target's results using regex with simple quotation
  347. regex_mpeg.append(regex_mpeg1)
  348. regex_mpeg2 = '<src="(.+?.mpeg)"' # search on target's results using regex with double quotation
  349. regex_mpeg.append(regex_mpeg2)
  350. #regex_mpeg3 = '<src=(.+?.mpeg)' # search on target's results using regex without quotations
  351. #regex_mpeg.append(regex_mpeg3)
  352. for regmpeg in regex_mpeg:
  353. pattern_mpeg = re.compile(regmpeg)
  354. mpeg_links = re.findall(pattern_mpeg, target_reply)
  355. mpegs = {}
  356. for mpeg in mpeg_links:
  357. try:
  358. if mpeg.startswith('http'):
  359. size = 0
  360. else:
  361. target_host = urlparse(target)
  362. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  363. if not target_url.endswith('/'): # add "/" to end of target
  364. target_url = target_url + "/"
  365. try:
  366. if self.ufonet.options.proxy: # set proxy
  367. self.proxy_transport(self.ufonet.options.proxy)
  368. req = urllib.request.Request(target_url + mpeg, None, headers)
  369. mpeg_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  370. else:
  371. req = urllib.request.Request(target_url + mpeg, None, headers)
  372. mpeg_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  373. print('+Video (.mpeg) found: ' + target_url + mpeg.split('"')[0])
  374. size = len(mpeg_file)
  375. mpegs[mpeg] = int(size)
  376. print('(Size: ' + str(size) + ' Bytes)')
  377. self.c_mpeg = self.c_mpeg + 1
  378. print('-'*12)
  379. except:
  380. size = 0
  381. except:
  382. print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
  383. size = 0
  384. biggest_mpeg = max(list(mpegs.keys()), key=lambda x: mpegs[x]) # search/extract biggest video (.mpeg) value from dict
  385. if biggest_mpeg:
  386. biggest_files[biggest_mpeg] = mpegs[biggest_mpeg] # add biggest video (.mpeg) to list
  387. except: # if not any .mpeg found, go for next
  388. pass
  389. try: # search for .mp3 files
  390. regex_mp3 = []
  391. regex_mp31 = "<src='(.+?.mp3)'" # search on target's results using regex with simple quotation
  392. regex_mp3.append(regex_mp31)
  393. regex_mp32 = '<src="(.+?.mp3)"' # search on target's results using regex with double quotation
  394. regex_mp3.append(regex_mp32)
  395. #regex_mp33 = '<src=(.+?.mp3)' # search on target's results using regex without quotations
  396. #regex_mp3.append(regex_mp33)
  397. for regmp3 in regex_mp3:
  398. pattern_mp3 = re.compile(regmp3)
  399. mp3_links = re.findall(pattern_mp3, target_reply)
  400. mp3s = {}
  401. for mp3 in mp3_links:
  402. try:
  403. if mp3.startswith('http'):
  404. size = 0
  405. else:
  406. target_host = urlparse(target)
  407. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  408. if not target_url.endswith('/'): # add "/" to end of target
  409. target_url = target_url + "/"
  410. try:
  411. if self.ufonet.options.proxy: # set proxy
  412. self.proxy_transport(self.ufonet.options.proxy)
  413. req = urllib.request.Request(target_url + mp3, None, headers)
  414. mp3_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  415. else:
  416. req = urllib.request.Request(target_url + mp3, None, headers)
  417. mp3_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  418. print('+Audio (.mp3) found: ' + target_url + mp3.split('"')[0])
  419. size = len(mp3_file)
  420. mp3s[mp3] = int(size)
  421. print('(Size: ' + str(size) + ' Bytes)')
  422. self.c_mp3 = self.c_mp3 + 1
  423. print('-'*12)
  424. except:
  425. size = 0
  426. except:
  427. print('[Error] [AI] Unable to retrieve info from Audio -> [Discarding!]')
  428. size = 0
  429. biggest_mp3 = max(list(mp3s.keys()), key=lambda x: mp3s[x]) # search/extract biggest audio (.mp3) value from dict
  430. if biggest_mp3:
  431. biggest_files[biggest_mp3] = mp3s[biggest_mp3] # add biggest audio (.mp3) to list
  432. except: # if not any .mp3 found, go for next
  433. pass
  434. try: # search for .mp4 files
  435. regex_mp4 = []
  436. regex_mp41 = "<src='(.+?.mp4)'" # search on target's results using regex with simple quotation
  437. regex_mp4.append(regex_mp41)
  438. regex_mp42 = '<src="(.+?.mp4)"' # search on target's results using regex with double quotation
  439. regex_mp4.append(regex_mp42)
  440. #regex_mp43 = '<src=(.+?.mp4)' # search on target's results using regex without quotations
  441. #regex_mp4.append(regex_mp43)
  442. for regmp4 in regex_mp4:
  443. pattern_mp4 = re.compile(regmp4)
  444. mp4_links = re.findall(pattern_mp4, target_reply)
  445. mp4s = {}
  446. for mp4 in mp4_links:
  447. try:
  448. if mp4.startswith('http'):
  449. size = 0
  450. else:
  451. target_host = urlparse(target)
  452. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  453. if not target_url.endswith('/'): # add "/" to end of target
  454. target_url = target_url + "/"
  455. try:
  456. if self.ufonet.options.proxy: # set proxy
  457. self.proxy_transport(self.ufonet.options.proxy)
  458. req = urllib.request.Request(target_url + mp4, None, headers)
  459. mp4_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  460. else:
  461. req = urllib.request.Request(target_url + mp4, None, headers)
  462. mp4_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  463. print('+Video (.mp4) found: ' + target_url + mp4.split('"')[0])
  464. size = len(mp4_file)
  465. mp4s[mp4] = int(size)
  466. print('(Size: ' + str(size) + ' Bytes)')
  467. self.c_mp4 = self.c_mp4 + 1
  468. print('-'*12)
  469. except:
  470. size = 0
  471. except:
  472. print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
  473. size = 0
  474. biggest_mp4 = max(list(mp4s.keys()), key=lambda x: mp4s[x]) # search/extract biggest video (.mp4) value from dict
  475. if biggest_mp4:
  476. biggest_files[biggest_mp4] = mp4s[biggest_mp4] # add biggest video (.mp4) to list
  477. except: # if not any .mp4 found, go for next
  478. pass
  479. try: # search for .ogg files
  480. regex_ogg = []
  481. regex_ogg1 = "<src='(.+?.ogg)'" # search on target's results using regex with simple quotation
  482. regex_ogg.append(regex_ogg1)
  483. regex_ogg2 = '<src="(.+?.ogg)"' # search on target's results using regex with double quotation
  484. regex_ogg.append(regex_ogg2)
  485. #regex_ogg3 = '<src=(.+?.ogg)' # search on target's results using regex without quotations
  486. #regex_ogg.append(regex_ogg3)
  487. for regogg in regex_ogg:
  488. pattern_ogg = re.compile(regogg)
  489. ogg_links = re.findall(pattern_ogg, target_reply)
  490. oggs = {}
  491. for ogg in ogg_links:
  492. try:
  493. if ogg.startswith('http'):
  494. size = 0
  495. else:
  496. target_host = urlparse(target)
  497. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  498. if not target_url.endswith('/'): # add "/" to end of target
  499. target_url = target_url + "/"
  500. try:
  501. if self.ufonet.options.proxy: # set proxy
  502. self.proxy_transport(self.ufonet.options.proxy)
  503. req = urllib.request.Request(target_url + ogg, None, headers)
  504. ogg_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  505. else:
  506. req = urllib.request.Request(target_url + ogg, None, headers)
  507. ogg_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  508. print('+Audio (.ogg) found: ' + target_url + ogg.split('"')[0])
  509. size = len(ogg_file)
  510. oggs[ogg] = int(size)
  511. print('(Size: ' + str(size) + ' Bytes)')
  512. self.c_ogg = self.c_ogg + 1
  513. print('-'*12)
  514. except:
  515. size = 0
  516. except:
  517. print('[Error] [AI] Unable to retrieve info from Audio -> [Discarding!]')
  518. size = 0
  519. biggest_ogg = max(list(oggs.keys()), key=lambda x: oggs[x]) # search/extract biggest video (.ogg) value from dict
  520. if biggest_ogg:
  521. biggest_files[biggest_ogg] = oggs[biggest_ogg] # add biggest video (.ogg) to list
  522. except: # if not any .ogg found, go for next
  523. pass
  524. try: # search for .ogv files
  525. regex_ogv = []
  526. regex_ogv1 = "<src='(.+?.ogv)'" # search on target's results using regex with simple quotation
  527. regex_ogv.append(regex_ogv1)
  528. regex_ogv2 = '<src="(.+?.ogv)"' # search on target's results using regex with double quotation
  529. regex_ogv.append(regex_ogv2)
  530. #regex_ogv3 = '<src=(.+?.ogv)' # search on target's results using regex without quotations
  531. #regex_ogv.append(regex_ogv3)
  532. for regogv in regex_ogv:
  533. pattern_ogv = re.compile(regogv)
  534. ogv_links = re.findall(pattern_ogv, target_reply)
  535. ogvs = {}
  536. for ogv in ogv_links:
  537. try:
  538. if ogv.startswith('http'):
  539. size = 0
  540. else:
  541. target_host = urlparse(target)
  542. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  543. if not target_url.endswith('/'): # add "/" to end of target
  544. target_url = target_url + "/"
  545. try:
  546. if self.ufonet.options.proxy: # set proxy
  547. self.proxy_transport(self.ufonet.options.proxy)
  548. req = urllib.request.Request(target_url + ogv, None, headers)
  549. ogv_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  550. else:
  551. req = urllib.request.Request(target_url + ogv, None, headers)
  552. ogv_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  553. print('+Video (.ogv) found: ' + target_url + ogv.split('"')[0])
  554. size = len(ogv_file)
  555. ogvs[ogv] = int(size)
  556. print('(Size: ' + str(size) + ' Bytes)')
  557. self.c_ogv = self.c_ogv + 1
  558. print('-'*12)
  559. except:
  560. size = 0
  561. except:
  562. print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
  563. size = 0
  564. biggest_ogv = max(list(ogvs.keys()), key=lambda x: ogvs[x]) # search/extract biggest video (.ogv) value from dict
  565. if biggest_ogv:
  566. biggest_files[biggest_ogv] = ogvs[biggest_ogv] # add biggest video (.ogv) to list
  567. except: # if not any .ogv found, go for next
  568. pass
  569. try: # search for .wmv files
  570. regex_wmv = []
  571. regex_wmv1 = "<src='(.+?.wmv)'" # search on target's results using regex with simple quotation
  572. regex_wmv.append(regex_wmv1)
  573. regex_wmv2 = '<src="(.+?.wmv)"' # search on target's results using regex with double quotation
  574. regex_wmv.append(regex_wmv2)
  575. #regex_wmv3 = '<src=(.+?.wmv)' # search on target's results using regex without quotations
  576. #regex_wmv.append(regex_wmv3)
  577. for regwmv in regex_wmv:
  578. pattern_wmv = re.compile(regwmv)
  579. wmv_links = re.findall(pattern_wmv, target_reply)
  580. wmvs = {}
  581. for wmv in wmv_links:
  582. try:
  583. if wmv.startswith('http'):
  584. size = 0
  585. else:
  586. target_host = urlparse(target)
  587. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  588. if not target_url.endswith('/'): # add "/" to end of target
  589. target_url = target_url + "/"
  590. try:
  591. if self.ufonet.options.proxy: # set proxy
  592. self.proxy_transport(self.ufonet.options.proxy)
  593. req = urllib.request.Request(target_url + wmv, None, headers)
  594. wmv_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  595. else:
  596. req = urllib.request.Request(target_url + wmv, None, headers)
  597. wmv_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  598. print('+Video (.wmv) found: ' + target_url + wmv.split('"')[0])
  599. size = len(wmv_file)
  600. wmvs[wmv] = int(size)
  601. print('(Size: ' + str(size) + ' Bytes)')
  602. self.c_wmv = self.c_wmv + 1
  603. print('-'*12)
  604. except:
  605. size = 0
  606. except:
  607. print('[Error] [AI] Unable to retrieve info from Video -> [Discarding!]')
  608. size = 0
  609. biggest_wmv = max(list(wmvs.keys()), key=lambda x: wmvs[x]) # search/extract biggest video (.wmv) value from dict
  610. if biggest_wmv:
  611. biggest_files[biggest_wmv] = wmvs[biggest_wmv] # add biggest video (.wmv) to list
  612. except: # if not any .wmv found, go for next
  613. pass
  614. try: # search for .css files
  615. regex_css = []
  616. regex_css1 = "href='(.+?.css[^']*)'" # search on target's results using regex with simple quotation
  617. regex_css.append(regex_css1)
  618. regex_css2 = 'href="(.+?.css[^"]*)"' # search on target's results using regex with double quotation
  619. regex_css.append(regex_css2)
  620. #regex_css3 = "href=(.+?.css[^']*)" # search on target's results using regex without quotations
  621. #regex_css.append(regex_css3)
  622. for regcss in regex_css:
  623. pattern_css = re.compile(regcss)
  624. css_links = re.findall(pattern_css, target_reply)
  625. csss = {}
  626. for css in css_links:
  627. try:
  628. if css.startswith('http'):
  629. size = 0
  630. else:
  631. target_host = urlparse(target)
  632. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  633. if not target_url.endswith('/'): # add "/" to end of target
  634. target_url = target_url + "/"
  635. if css.startswith("//"):
  636. size = 0
  637. elif "http://" in css or "https://" in css:
  638. size = 0
  639. else:
  640. if css.startswith('/'):
  641. css = css.replace("/", "", 1)
  642. try:
  643. if self.ufonet.options.proxy: # set proxy
  644. self.proxy_transport(self.ufonet.options.proxy)
  645. req = urllib.request.Request(target_url + css, None, headers)
  646. css_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  647. else:
  648. req = urllib.request.Request(target_url + css, None, headers)
  649. css_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  650. print('+Style (.css) found: ' + target_url + css.split('"')[0])
  651. size = len(css_file)
  652. csss[css] = int(size)
  653. print('(Size: ' + str(size) + ' Bytes)')
  654. self.c_css = self.c_css + 1
  655. print('-'*12)
  656. except:
  657. size = 0
  658. except:
  659. print('[Error] [AI] Unable to retrieve info from Style -> [Discarding!]')
  660. size = 0
  661. biggest_css = max(list(csss.keys()), key=lambda x: csss[x]) # search/extract biggest style (.css) value from dict
  662. if biggest_css:
  663. biggest_files[biggest_css] = csss[biggest_css] # add biggest style (.css) to list
  664. except: # if not any .css found, go for next
  665. pass
  666. try: # search for .js files
  667. regex_js = []
  668. regex_js1 = "src='(.+?.js[^']*)'" # search on target's results using regex with simple quotation
  669. regex_js.append(regex_js1)
  670. regex_js2 = 'src="(.+?.js[^"]*)"' # search on target's results using regex with double quotation
  671. regex_js.append(regex_js2)
  672. #regex_js3 = "src=(.+?.js[^']*)" # search on target's results using regex without quotations
  673. #regex_js.append(regex_js3)
  674. for regjs in regex_js:
  675. pattern_js = re.compile(regjs)
  676. js_links = re.findall(pattern_js, target_reply)
  677. jss = {}
  678. for js in js_links:
  679. try:
  680. if js.startswith('http'):
  681. size = 0
  682. else:
  683. target_host = urlparse(target)
  684. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  685. if not target_url.endswith('/'): # add "/" to end of target
  686. target_url = target_url + "/"
  687. if js.startswith("//"):
  688. size = 0
  689. elif "http://" in js or "https://" in js:
  690. size = 0
  691. else:
  692. if js.startswith('/'):
  693. js = js.replace("/", "", 1)
  694. print('+Script (.js) found: ' + target_url + js.split('"')[0])
  695. if self.ufonet.options.proxy: # set proxy
  696. self.proxy_transport(self.ufonet.options.proxy)
  697. req = urllib.request.Request(target_url + js, None, headers)
  698. js_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  699. else:
  700. req = urllib.request.Request(target_url + js, None, headers)
  701. js_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  702. size = len(js_file)
  703. jss[js] = int(size)
  704. print('(Size: ' + str(size) + ' Bytes)')
  705. self.c_js = self.c_js + 1
  706. print('-'*12)
  707. except:
  708. print('[Error] [AI] Unable to retrieve info from Script -> [Discarding!]')
  709. size = 0
  710. biggest_js = max(list(jss.keys()), key=lambda x: jss[x]) # search/extract biggest script (.js) value from dict
  711. if biggest_js:
  712. biggest_files[biggest_js] = jss[biggest_js] # add biggest script (.js) to list
  713. except: # if not any .js found, go for next
  714. pass
  715. try: # search for .xml files
  716. regex_xml = []
  717. regex_xml1 = "href='(.+?.xml)'" # search on target's results using regex with simple quotation
  718. regex_xml.append(regex_xml1)
  719. regex_xml2 = 'href="(.+?.xml)"' # search on target's results using regex with double quotation
  720. regex_xml.append(regex_xml2)
  721. #regex_xml3 = 'href=(.+?.xml)' # search on target's results using regex without quotations
  722. #regex_xml.append(regex_xml3)
  723. for regxml in regex_xml:
  724. pattern_xml = re.compile(regxml)
  725. xml_links = re.findall(pattern_xml, target_reply)
  726. xmls = {}
  727. for xml in xml_links:
  728. try:
  729. if xml.startswith('http'):
  730. size = 0
  731. else:
  732. target_host = urlparse(target)
  733. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  734. if not target_url.endswith('/'): # add "/" to end of target
  735. target_url = target_url + "/"
  736. try:
  737. if self.ufonet.options.proxy: # set proxy
  738. self.proxy_transport(self.ufonet.options.proxy)
  739. req = urllib.request.Request(target_url + xml, None, headers)
  740. xml_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  741. else:
  742. req = urllib.request.Request(target_url + xml, None, headers)
  743. xml_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  744. print('+Script (.xml) found: ' + target_url + xml).split('"')[0]
  745. size = len(xml_file)
  746. xmls[xml] = int(size)
  747. print('(Size: ' + str(size) + ' Bytes)')
  748. self.c_xml = self.c_xml + 1
  749. print('-'*12)
  750. except:
  751. size = 0
  752. except:
  753. print('[Error] [AI] Unable to retrieve info from Script -> [Discarding!]')
  754. size = 0
  755. biggest_xml = max(list(xmls.keys()), key=lambda x: xmls[x]) # search/extract biggest script (.xml) value from dict
  756. if biggest_xml:
  757. biggest_files[biggest_xml] = xmls[biggest_xml] # add biggest script (.xml) to list
  758. except: # if not any .xml found, go for next
  759. pass
  760. try: # search for .php files
  761. regex_php = []
  762. regex_php1 = "href='(.+?.php)'" # search on target's results using regex with simple quotation
  763. regex_php.append(regex_php1)
  764. regex_php2 = 'href="(.+?.php)"' # search on target's results using regex with double quotation
  765. regex_php.append(regex_php2)
  766. #regex_php3 = 'href=(.+?.php)' # search on target's results using regex without quotations
  767. #regex_php.append(regex_php3)
  768. for regphp in regex_php:
  769. pattern_php = re.compile(regphp)
  770. php_links = re.findall(pattern_php, target_reply)
  771. phps = {}
  772. for php in php_links:
  773. try:
  774. if php.startswith('http'):
  775. size = 0
  776. else:
  777. target_host = urlparse(target)
  778. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  779. if not target_url.endswith('/'): # add "/" to end of target
  780. target_url = target_url + "/"
  781. try:
  782. if self.ufonet.options.proxy: # set proxy
  783. self.proxy_transport(self.ufonet.options.proxy)
  784. req = urllib.request.Request(target_url + php, None, headers)
  785. php_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  786. else:
  787. req = urllib.request.Request(target_url + php, None, headers)
  788. php_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  789. print('+Webpage (.php) found: ' + target_url + php.split('"')[0])
  790. size = len(php_file)
  791. phps[php] = int(size)
  792. print('(Size: ' + str(size) + ' Bytes)')
  793. self.c_php = self.c_php + 1
  794. print('-'*12)
  795. except:
  796. size = 0
  797. except:
  798. print('[Error] [AI] Unable to retrieve info from Webpage -> [Discarding!]')
  799. size = 0
  800. biggest_php = max(list(phps.keys()), key=lambda x: phps[x]) # search/extract biggest file (.php) value from dict
  801. if biggest_php:
  802. biggest_files[biggest_php] = phps[biggest_php] # add biggest file (.php) to list
  803. except: # if not any .php found, go for next
  804. pass
  805. try: # search for .html files
  806. regex_html = []
  807. regex_html1 = "href='(.+?.html)'" # search on target's results using regex with simple quotation
  808. regex_html.append(regex_html1)
  809. regex_html2 = 'href="(.+?.html)"' # search on target's results using regex with double quotation
  810. regex_html.append(regex_html2)
  811. #regex_html3 = 'href=(.+?.html)' # search on target's results using regex without quotations
  812. #regex_html.append(regex_html3)
  813. for reghtml in regex_html:
  814. pattern_html = re.compile(reghtml)
  815. html_links = re.findall(pattern_html, target_reply)
  816. htmls = {}
  817. for html in html_links:
  818. try:
  819. if html.startswith('http'):
  820. size = 0
  821. else:
  822. target_host = urlparse(target)
  823. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  824. if not target_url.endswith('/'): # add "/" to end of target
  825. target_url = target_url + "/"
  826. try:
  827. if self.ufonet.options.proxy: # set proxy
  828. self.proxy_transport(self.ufonet.options.proxy)
  829. req = urllib.request.Request(target_url + html, None, headers)
  830. html_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  831. else:
  832. req = urllib.request.Request(target_url + html, None, headers)
  833. html_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  834. print('+Webpage (.html) found: ' + target_url + html.split('"')[0])
  835. size = len(html_file)
  836. htmls[html] = int(size)
  837. print('(Size: ' + str(size) + ' Bytes)')
  838. self.c_html = self.c_html + 1
  839. print('-'*12)
  840. except:
  841. size = 0
  842. except:
  843. print('[Error] [AI] Unable to retrieve info from Webpage -> [Discarding!]')
  844. size = 0
  845. biggest_html = max(list(htmls.keys()), key=lambda x: htmls[x]) # search/extract biggest file (.html) value from dict
  846. if biggest_html:
  847. biggest_files[biggest_html] = htmls[biggest_html] # add biggest file (.html) to list
  848. except: # if not any .html found, go for next
  849. pass
  850. try: # search for .jsp files
  851. regex_jsp = []
  852. regex_jsp1 = "href='(.+?.jsp)'" # search on target's results using regex with simple quotation
  853. regex_jsp.append(regex_jsp1)
  854. regex_jsp2 = 'href="(.+?.jsp)"' # search on target's results using regex with double quotation
  855. regex_jsp.append(regex_jsp2)
  856. #regex_jsp3 = 'href=(.+?.jsp)' # search on target's results using regex without quotations
  857. #regex_jsp.append(regex_jsp3)
  858. for regjsp in regex_jsp:
  859. pattern_jsp = re.compile(regjsp)
  860. jsp_links = re.findall(pattern_jsp, target_reply)
  861. jsps = {}
  862. for jsp in jsp_links:
  863. try:
  864. if jsp.startswith('http'):
  865. size = 0
  866. else:
  867. target_host = urlparse(target)
  868. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  869. if not target_url.endswith('/'): # add "/" to end of target
  870. target_url = target_url + "/"
  871. try:
  872. if self.ufonet.options.proxy: # set proxy
  873. self.proxy_transport(self.ufonet.options.proxy)
  874. req = urllib.request.Request(target_url + jsp, None, headers)
  875. jsp_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  876. else:
  877. req = urllib.request.Request(target_url + jsp, None, headers)
  878. jsp_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  879. print('+Webpage (.jsp) found: ' + target_url + jsp.split('"')[0])
  880. size = len(jsp_file)
  881. jsps[jsp] = int(size)
  882. print('(Size: ' + str(size) + ' Bytes)')
  883. self.c_jsp = self.c_jsp + 1
  884. print('-'*12)
  885. except:
  886. size = 0
  887. except:
  888. print('[Error] [AI] Unable to retrieve info from Webpage -> [Discarding!]')
  889. size = 0
  890. biggest_jsp = max(list(jsps.keys()), key=lambda x: jsps[x]) # search/extract biggest file (.jsp) value from dict
  891. if biggest_jsp:
  892. biggest_files[biggest_jsp] = jsps[biggest_jsp] # add biggest file (.jsp) to list
  893. except: # if not any .jsp found, go for next
  894. pass
  895. try: # search for .asp files
  896. regex_asp = []
  897. regex_asp1 = "href='(.+?.asp)'" # search on target's results using regex with simple quotation
  898. regex_asp.append(regex_asp1)
  899. regex_asp2 = 'href="(.+?.asp)"' # search on target's results using regex with double quotation
  900. regex_asp.append(regex_asp2)
  901. #regex_asp3 = 'href=(.+?.asp)' # search on target's results using regex without quotations
  902. #regex_asp.append(regex_asp3)
  903. for regasp in regex_asp:
  904. pattern_asp = re.compile(regasp)
  905. asp_links = re.findall(pattern_asp, target_reply)
  906. asps = {}
  907. for asp in asp_links:
  908. try:
  909. if asp.startswith('http'):
  910. size = 0
  911. else:
  912. target_host = urlparse(target)
  913. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  914. if not target_url.endswith('/'): # add "/" to end of target
  915. target_url = target_url + "/"
  916. try:
  917. if self.ufonet.options.proxy: # set proxy
  918. self.proxy_transport(self.ufonet.options.proxy)
  919. req = urllib.request.Request(target_url + asp, None, headers)
  920. asp_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  921. else:
  922. req = urllib.request.Request(target_url + asp, None, headers)
  923. asp_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  924. print('+Webpage (.asp) found: ' + target_url + asp.split('"')[0])
  925. size = len(asp_file)
  926. asps[asp] = int(size)
  927. print('(Size: ' + str(size) + ' Bytes)')
  928. self.c_asp = self.c_asp + 1
  929. print('-'*12)
  930. except:
  931. size = 0
  932. except:
  933. print('[Error] [AI] Unable to retrieve info from Webpage -> [Discarding!]')
  934. size = 0
  935. biggest_asp = max(list(asps.keys()), key=lambda x: asps[x]) # search/extract biggest file (.asp) value from dict
  936. if biggest_asp:
  937. biggest_files[biggest_asp] = asps[biggest_asp] # add biggest file (.asp) to list
  938. except: # if not any .asp found, go for next
  939. pass
  940. try: # search for .txt files
  941. regex_txt = []
  942. regex_txt1 = "href='(.+?.txt)'" # search on target's results using regex with simple quotation
  943. regex_txt.append(regex_txt1)
  944. regex_txt2 = 'href="(.+?.txt)"' # search on target's results using regex with double quotation
  945. regex_txt.append(regex_txt2)
  946. #regex_txt3 = 'href=(.+?.txt)' # search on target's results using regex without quotations
  947. #regex_txt.append(regex_txt3)
  948. for regtxt in regex_txt:
  949. pattern_txt = re.compile(regtxt)
  950. txt_links = re.findall(pattern_txt, target_reply)
  951. txts = {}
  952. for txt in txt_links:
  953. try:
  954. if txt.startswith('http'):
  955. size = 0
  956. else:
  957. target_host = urlparse(target)
  958. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  959. if not target_url.endswith('/'): # add "/" to end of target
  960. target_url = target_url + "/"
  961. try:
  962. if self.ufonet.options.proxy: # set proxy
  963. self.proxy_transport(self.ufonet.options.proxy)
  964. req = urllib.request.Request(target_url + txt, None, headers)
  965. txt_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  966. else:
  967. req = urllib.request.Request(target_url + txt, None, headers)
  968. txt_file = urllib.request.urlopen(req, context=self.ctx).read().decode('utf-8')
  969. print('+File (.txt) found: ' + target_url + txt.split('"')[0])
  970. size = len(txt_file)
  971. txts[txt] = int(size)
  972. print('(Size: ' + str(size) + ' Bytes)')
  973. self.c_txt = self.c_txt + 1
  974. print('-'*12)
  975. except:
  976. size = 0
  977. except:
  978. print('[Error] [AI] Unable to retrieve info from Text file -> [Discarding!]')
  979. size = 0
  980. biggest_txt = max(list(txts.keys()), key=lambda x: txts[x]) # search/extract biggest file (.txt) value from dict
  981. if biggest_text:
  982. biggest_files[biggest_txt] = txts[biggest_txt] # add biggest file (.txt) to list
  983. except: # if not any .txt found, go for next
  984. pass
  985. print("\n" +'='*80)
  986. total_objects = self.c_images + self.c_mov + self.c_webm + self.c_avi + self.c_swf + self.c_mpg + self.c_mpeg + self.c_mp3 + self.c_ogg + self.c_ogv + self.c_wmv + self.c_css + self.c_js + self.c_xml + self.c_php + self.c_html + self.c_jsp + self.c_asp + self.c_txt
  987. print("Total objects found:", total_objects)
  988. print('-'*20)
  989. print("images:", self.c_images)
  990. print(".mov :", self.c_mov)
  991. print(".jsp :", self.c_jsp)
  992. print(".avi :", self.c_avi)
  993. print(".html :", self.c_html)
  994. print(".mpg :", self.c_mpg)
  995. print(".asp :", self.c_asp)
  996. print(".mp3 :", self.c_mp3)
  997. print(".js :", self.c_js)
  998. print(".ogv :", self.c_ogv)
  999. print(".wmv :", self.c_wmv)
  1000. print(".css :", self.c_css)
  1001. print(".mpeg :", self.c_mpeg)
  1002. print(".xml :", self.c_xml)
  1003. print(".php :", self.c_php)
  1004. print(".txt :", self.c_txt)
  1005. print(".webm :", self.c_webm)
  1006. print(".ogg :", self.c_ogg)
  1007. print(".swf :", self.c_swf)
  1008. print('-'*20)
  1009. print('='*80)
  1010. if(biggest_files=={}):
  1011. print("\n[Info] [AI] Not any link found on target! -> [Exiting!]\n\n")
  1012. print('='*80 + '\n')
  1013. return
  1014. biggest_file_on_target = max(list(biggest_files.keys()), key=lambda x: biggest_files[x]) # search/extract biggest file value from dict
  1015. target_host = urlparse(target)
  1016. target_url = target_host.scheme + "://" + target_host.netloc + target_host.path
  1017. if biggest_file_on_target.startswith('http'): # used for absolute links
  1018. for url,size in list(biggest_files.items()): # review all dict values
  1019. if url.startswith('http'):
  1020. if not target_url in url: # extract/dismiss external links
  1021. del biggest_files[url] # remove value from dict
  1022. biggest_file_on_target = max(list(biggest_files.keys()), key=lambda x: biggest_files[x]) # extract new value
  1023. print('=Biggest File: ' + biggest_file_on_target)
  1024. else: # used for relative links
  1025. if not target_url.endswith('/'): # add "/" to end of target
  1026. target_url = target_url + "/"
  1027. print('=Biggest File: ' + target_url + biggest_file_on_target)
  1028. print('='*80 + '\n')