dork.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-"
  3. # vim: set expandtab tabstop=4 shiftwidth=4:
  4. """
  5. This file is part of the xsser project, https://xsser.03c8.net
  6. Copyright (c) 2011/2016/2018 psy <epsylon@riseup.net>
  7. xsser is free software; you can redistribute it and/or modify it under
  8. the terms of the GNU General Public License as published by the Free
  9. Software Foundation version 3 of the License.
  10. xsser is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  13. details.
  14. You should have received a copy of the GNU General Public License along
  15. with xsser; if not, write to the Free Software Foundation, Inc., 51
  16. Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  17. ........
  18. List of search engines: http://en.wikipedia.org/wiki/List_of_search_engines
  19. """
  20. import urllib2, traceback, re, random
  21. urllib2.socket.setdefaulttimeout(5.0)
  22. DEBUG = 0
  23. class Dorker(object):
  24. def __init__(self, engine='yahoo'):
  25. self._engine = engine
  26. self.search_engines = [] # available dorking search engines
  27. self.search_engines.append('bing')
  28. self.search_engines.append('yahoo')
  29. self.agents = [] # user-agents
  30. try:
  31. f = open("core/fuzzing/user-agents.txt").readlines() # set path for user-agents
  32. except:
  33. f = open("fuzzing/user-agents.txt").readlines() # set path for user-agents when testing
  34. for line in f:
  35. self.agents.append(line)
  36. def dork(self, search):
  37. """
  38. Perform a search and return links.
  39. """
  40. if self._engine == 'bing': # works at 20-02-2011 -> 19-02-2016 -> 09-04-2018
  41. search_url = 'https://www.bing.com/search?q="' + search + '"'
  42. elif self._engine == 'yahoo': # works at 20-02-2011 -> 19-02-2016 -> -> 09-04-2018
  43. search_url = 'https://search.yahoo.com/search?q="' + search + '"'
  44. else:
  45. print "\n[Error] This search engine is not supported!\n"
  46. print "[Info] List of available:"
  47. print '-'*25
  48. for e in self.search_engines:
  49. print "+ "+e
  50. print ""
  51. try:
  52. self.search_url = search_url
  53. print "\n[Info] Search query:", urllib2.unquote(search_url)
  54. user_agent = random.choice(self.agents).strip() # set random user-agent
  55. referer = '127.0.0.1' # set referer to localhost / WAF black magic!
  56. headers = {'User-Agent' : user_agent, 'Referer' : referer}
  57. req = urllib2.Request(search_url, None, headers)
  58. html_data = urllib2.urlopen(req).read()
  59. print "\n[Info] Retrieving requested info..."
  60. except urllib2.URLError, e:
  61. if DEBUG:
  62. traceback.print_exc()
  63. print "\n[Error] Cannot connect!"
  64. return
  65. if self._engine == 'bing':
  66. regex = '<h2><a href="(.+?)" h=' # regex magics 09-04/2018
  67. if self._engine == 'yahoo':
  68. regex = 'RU=(.+?)/RK=' # regex magics [09/04/2018]
  69. pattern = re.compile(regex)
  70. links = re.findall(pattern, html_data)
  71. found_links = []
  72. if links:
  73. for link in links:
  74. link = urllib2.unquote(link)
  75. if self._engine == "yahoo":
  76. if "RU=https://www.yahoo.com/" in link:
  77. link = "" # invalid url
  78. if search.upper() in link.upper(): # parse that search query is on url
  79. sep = search
  80. link2 = link.split(sep,1)[0]
  81. if link2 not in found_links: # parse that target is not duplicated
  82. found_links.append(link)
  83. else:
  84. print "\n[Info] Not any link found for that query!"
  85. return found_links
  86. if __name__ == '__main__':
  87. for a in ['yahoo', 'bing']:
  88. dork = Dorker(a)
  89. res = dork.dork("news.php?id=")
  90. if res:
  91. print "[+]", a, ":", len(res), "\n"
  92. for b in res:
  93. print " *", b