threadpool.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-"
  3. # vim: set expandtab tabstop=4 shiftwidth=4:
  4. """
  5. This file is part of the XSSer project, https://xsser.03c8.net
  6. Copyright (c) 2010/2019 | psy <epsylon@riseup.net>
  7. xsser is free software; you can redistribute it and/or modify it under
  8. the terms of the GNU General Public License as published by the Free
  9. Software Foundation version 3 of the License.
  10. xsser is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  13. details.
  14. You should have received a copy of the GNU General Public License along
  15. with xsser; if not, write to the Free Software Foundation, Inc., 51
  16. Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  17. """
  18. """Easy to use object-oriented thread pool framework.
  19. A thread pool is an object that maintains a pool of worker threads to perform
  20. time consuming operations in parallel. It assigns jobs to the threads
  21. by putting them in a work request queue, where they are picked up by the
  22. next available thread. This then performs the requested operation in the
  23. background and puts the results in another queue.
  24. The thread pool object can then collect the results from all threads from
  25. this queue as soon as they become available or after all threads have
  26. finished their work. It's also possible, to define callbacks to handle
  27. each result as it comes in.
  28. The basic concept and some code was taken from the book "Python in a Nutshell,
  29. 2nd edition" by Alex Martelli, O'Reilly 2006, ISBN 0-596-10046-9, from section
  30. 14.5 "Threaded Program Architecture". I wrapped the main program logic in the
  31. ThreadPool class, added the WorkRequest class and the callback system and
  32. tweaked the code here and there. Kudos also to Florent Aide for the exception
  33. handling mechanism.
  34. Basic usage::
  35. >>> pool = ThreadPool(poolsize)
  36. >>> requests = makeRequests(some_callable, list_of_args, callback)
  37. >>> [pool.putRequest(req) for req in requests]
  38. >>> pool.wait()
  39. See the end of the module code for a brief, annotated usage example.
  40. Website : http://chrisarndt.de/projects/threadpool/
  41. """
  42. __docformat__ = "restructuredtext en"
  43. __all__ = [
  44. 'makeRequests',
  45. 'NoResultsPending',
  46. 'NoWorkersAvailable',
  47. 'ThreadPool',
  48. 'WorkRequest',
  49. 'WorkerThread'
  50. ]
  51. __author__ = "Christopher Arndt"
  52. __version__ = '1.2.7'
  53. __revision__ = "$Revision: 416 $"
  54. __date__ = "$Date: 2009-10-07 05:41:27 +0200 (Wed, 07 Oct 2009) $"
  55. __license__ = "MIT license"
  56. # standard library modules
  57. import sys
  58. import threading
  59. try:
  60. import Queue
  61. except ImportError:
  62. import queue as Queue
  63. from Queue import Empty
  64. import traceback
  65. # exceptions
  66. class NoResultsPending(Exception):
  67. """All work requests have been processed."""
  68. pass
  69. class NoWorkersAvailable(Exception):
  70. """No worker threads available to process remaining requests."""
  71. pass
  72. # internal module helper functions
  73. def _handle_thread_exception(request, exc_info):
  74. """Default exception handler callback function.
  75. This just prints the exception info via ``traceback.print_exception``.
  76. """
  77. traceback.print_exception(*exc_info)
  78. # utility functions
  79. def makeRequests(callable_, args_list, callback=None,
  80. exc_callback=_handle_thread_exception):
  81. """Create several work requests for same callable with different arguments.
  82. Convenience function for creating several work requests for the same
  83. callable where each invocation of the callable receives different values
  84. for its arguments.
  85. ``args_list`` contains the parameters for each invocation of callable.
  86. Each item in ``args_list`` should be either a 2-item tuple of the list of
  87. positional arguments and a dictionary of keyword arguments or a single,
  88. non-tuple argument.
  89. See docstring for ``WorkRequest`` for info on ``callback`` and
  90. ``exc_callback``.
  91. """
  92. requests = []
  93. for item in args_list:
  94. is_crawling = False
  95. try:
  96. psy = item[3] # black magic!
  97. is_crawling = True
  98. except:
  99. is_crawling = False
  100. if isinstance(item, tuple):
  101. requests.append(
  102. WorkRequest(callable_, item[0], item[1], callback=callback,
  103. exc_callback=exc_callback)
  104. )
  105. else:
  106. if is_crawling == True:
  107. requests.append(
  108. WorkRequest(callable_, [item], None, callback=callback,
  109. exc_callback=exc_callback)
  110. )
  111. else:
  112. requests.append(
  113. WorkRequest(callable_, item, None, callback=callback,
  114. exc_callback=exc_callback)
  115. )
  116. return requests
  117. # classes
  118. class WorkerThread(threading.Thread):
  119. """Background thread connected to the requests/results queues.
  120. A worker thread sits in the background and picks up work requests from
  121. one queue and puts the results in another until it is dismissed.
  122. """
  123. def __init__(self, requests_queue, results_queue, poll_timeout=5, **kwds):
  124. """Set up thread in daemonic mode and start it immediatedly.
  125. ``requests_queue`` and ``results_queue`` are instances of
  126. ``Queue.Queue`` passed by the ``ThreadPool`` class when it creates a new
  127. worker thread.
  128. """
  129. threading.Thread.__init__(self, **kwds)
  130. self.setDaemon(1)
  131. self._requests_queue = requests_queue
  132. self._results_queue = results_queue
  133. self._poll_timeout = poll_timeout
  134. self._dismissed = threading.Event()
  135. self.start()
  136. def run(self):
  137. """Repeatedly process the job queue until told to exit."""
  138. while True:
  139. if self._dismissed.isSet():
  140. # we are dismissed, break out of loop
  141. break
  142. # get next work request. If we don't get a new request from the
  143. # queue after self._poll_timout seconds, we jump to the start of
  144. # the while loop again, to give the thread a chance to exit.
  145. try:
  146. request = self._requests_queue.get(True, self._poll_timeout)
  147. except Empty:
  148. continue
  149. else:
  150. if self._dismissed.isSet():
  151. # we are dismissed, put back request in queue and exit loop
  152. self._requests_queue.put(request)
  153. break
  154. try:
  155. result = request.callable(*request.args, **request.kwds)
  156. self._results_queue.put((request, result))
  157. except:
  158. request.exception = True
  159. self._results_queue.put((request, sys.exc_info()))
  160. def dismiss(self):
  161. """Sets a flag to tell the thread to exit when done with current job."""
  162. self._dismissed.set()
  163. class WorkRequest:
  164. """A request to execute a callable for putting in the request queue later.
  165. See the module function ``makeRequests`` for the common case
  166. where you want to build several ``WorkRequest`` objects for the same
  167. callable but with different arguments for each call.
  168. """
  169. def __init__(self, callable_, args=None, kwds=None, requestID=None,
  170. callback=None, exc_callback=_handle_thread_exception):
  171. """Create a work request for a callable and attach callbacks.
  172. A work request consists of the a callable to be executed by a
  173. worker thread, a list of positional arguments, a dictionary
  174. of keyword arguments.
  175. A ``callback`` function can be specified, that is called when the
  176. results of the request are picked up from the result queue. It must
  177. accept two anonymous arguments, the ``WorkRequest`` object and the
  178. results of the callable, in that order. If you want to pass additional
  179. information to the callback, just stick it on the request object.
  180. You can also give custom callback for when an exception occurs with
  181. the ``exc_callback`` keyword parameter. It should also accept two
  182. anonymous arguments, the ``WorkRequest`` and a tuple with the exception
  183. details as returned by ``sys.exc_info()``. The default implementation
  184. of this callback just prints the exception info via
  185. ``traceback.print_exception``. If you want no exception handler
  186. callback, just pass in ``None``.
  187. ``requestID``, if given, must be hashable since it is used by
  188. ``ThreadPool`` object to store the results of that work request in a
  189. dictionary. It defaults to the return value of ``id(self)``.
  190. """
  191. if requestID is None:
  192. self.requestID = id(self)
  193. else:
  194. try:
  195. self.requestID = hash(requestID)
  196. except TypeError:
  197. raise TypeError("requestID must be hashable.")
  198. self.exception = False
  199. self.callback = callback
  200. self.exc_callback = exc_callback
  201. self.callable = callable_
  202. self.args = args or []
  203. self.kwds = kwds or {}
  204. def __str__(self):
  205. return "<WorkRequest id=%s args=%r kwargs=%r exception=%s>" % \
  206. (self.requestID, self.args, self.kwds, self.exception)
  207. class ThreadPool:
  208. """A thread pool, distributing work requests and collecting results.
  209. See the module docstring for more information.
  210. """
  211. def __init__(self, num_workers, q_size=0, resq_size=0, poll_timeout=5):
  212. """Set up the thread pool and start num_workers worker threads.
  213. ``num_workers`` is the number of worker threads to start initially.
  214. If ``q_size > 0`` the size of the work *request queue* is limited and
  215. the thread pool blocks when the queue is full and it tries to put
  216. more work requests in it (see ``putRequest`` method), unless you also
  217. use a positive ``timeout`` value for ``putRequest``.
  218. If ``resq_size > 0`` the size of the *results queue* is limited and the
  219. worker threads will block when the queue is full and they try to put
  220. new results in it.
  221. .. warning:
  222. If you set both ``q_size`` and ``resq_size`` to ``!= 0`` there is
  223. the possibilty of a deadlock, when the results queue is not pulled
  224. regularly and too many jobs are put in the work requests queue.
  225. To prevent this, always set ``timeout > 0`` when calling
  226. ``ThreadPool.putRequest()`` and catch ``Queue.Full`` exceptions.
  227. """
  228. self._requests_queue = Queue.Queue(q_size)
  229. self._results_queue = Queue.Queue(resq_size)
  230. self.workers = []
  231. self.dismissedWorkers = []
  232. self.workRequests = {}
  233. self.createWorkers(num_workers, poll_timeout)
  234. def createWorkers(self, num_workers, poll_timeout=5):
  235. """Add num_workers worker threads to the pool.
  236. ``poll_timout`` sets the interval in seconds (int or float) for how
  237. ofte threads should check whether they are dismissed, while waiting for
  238. requests.
  239. """
  240. for i in range(num_workers):
  241. self.workers.append(WorkerThread(self._requests_queue,
  242. self._results_queue, poll_timeout=poll_timeout))
  243. def dismissWorkers(self, num_workers, do_join=False):
  244. """Tell num_workers worker threads to quit after their current task."""
  245. dismiss_list = []
  246. for i in range(min(num_workers, len(self.workers))):
  247. worker = self.workers.pop()
  248. worker.dismiss()
  249. dismiss_list.append(worker)
  250. if do_join:
  251. for worker in dismiss_list:
  252. worker.join()
  253. else:
  254. self.dismissedWorkers.extend(dismiss_list)
  255. def joinAllDismissedWorkers(self):
  256. """Perform Thread.join() on all worker threads that have been dismissed.
  257. """
  258. for worker in self.dismissedWorkers:
  259. worker.join()
  260. self.dismissedWorkers = []
  261. def putRequest(self, request, block=True, timeout=None):
  262. """Put work request into work queue and save its id for later."""
  263. assert isinstance(request, WorkRequest)
  264. # don't reuse old work requests
  265. assert not getattr(request, 'exception', None)
  266. self._requests_queue.put(request, block, timeout)
  267. self.workRequests[request.requestID] = request
  268. def addRequest(self, do_cb, data, print_cb, exception_cb, block=True,
  269. timeout=None):
  270. """Put work request into work queue and save its id for later."""
  271. requests = makeRequests(do_cb, data, print_cb, exception_cb)
  272. for req in requests:
  273. self.putRequest(req, block, timeout)
  274. def poll(self, block=False):
  275. """Process any new results in the queue."""
  276. while True:
  277. # still results pending?
  278. if not self.workRequests:
  279. raise NoResultsPending
  280. # are there still workers to process remaining requests?
  281. elif block and not self.workers:
  282. raise NoWorkersAvailable
  283. try:
  284. # get back next results
  285. request, result = self._results_queue.get(block=block)
  286. # has an exception occured?
  287. if request.exception and request.exc_callback:
  288. request.exc_callback(request, result)
  289. # hand results to callback, if any
  290. if request.callback and not \
  291. (request.exception and request.exc_callback):
  292. request.callback(request, result)
  293. del self.workRequests[request.requestID]
  294. except Empty:
  295. break
  296. def wait(self):
  297. """Wait for results, blocking until all have arrived."""
  298. while 1:
  299. try:
  300. self.poll(True)
  301. except NoResultsPending:
  302. break
  303. ################
  304. # USAGE EXAMPLE
  305. ################
  306. if __name__ == '__main__':
  307. import random
  308. import time
  309. # the work the threads will have to do (rather trivial in our example)
  310. def do_something(data):
  311. time.sleep(random.randint(1,5))
  312. result = round(random.random() * data, 5)
  313. # just to show off, we throw an exception once in a while
  314. if result > 5:
  315. raise RuntimeError("Something extraordinary happened!")
  316. return result
  317. # this will be called each time a result is available
  318. def print_result(request, result):
  319. print("**** Result from request #%s: %r" % (request.requestID, result))
  320. # this will be called when an exception occurs within a thread
  321. # this example exception handler does little more than the default handler
  322. def handle_exception(request, exc_info):
  323. if not isinstance(exc_info, tuple):
  324. # Something is seriously wrong...
  325. print(request)
  326. print(exc_info)
  327. raise SystemExit
  328. print("**** Exception occured in request #%s: %s" % \
  329. (request.requestID, exc_info))
  330. # assemble the arguments for each job to a list...
  331. data = [random.randint(1,10) for i in range(20)]
  332. # ... and build a WorkRequest object for each item in data
  333. requests = makeRequests(do_something, data, print_result, handle_exception)
  334. # to use the default exception handler, uncomment next line and comment out
  335. # the preceding one.
  336. #requests = makeRequests(do_something, data, print_result)
  337. # or the other form of args_lists accepted by makeRequests: ((,), {})
  338. data = [((random.randint(1,10),), {}) for i in range(20)]
  339. requests.extend(
  340. makeRequests(do_something, data, print_result, handle_exception)
  341. #makeRequests(do_something, data, print_result)
  342. # to use the default exception handler, uncomment next line and comment
  343. # out the preceding one.
  344. )
  345. # we create a pool of 3 worker threads
  346. print("Creating thread pool with 3 worker threads.")
  347. main = ThreadPool(3)
  348. # then we put the work requests in the queue...
  349. for req in requests:
  350. main.putRequest(req)
  351. print("Work request #%s added." % req.requestID)
  352. # or shorter:
  353. # [main.putRequest(req) for req in requests]
  354. # ...and wait for the results to arrive in the result queue
  355. # by using ThreadPool.wait(). This would block until results for
  356. # all work requests have arrived:
  357. # main.wait()
  358. # instead we can poll for results while doing something else:
  359. i = 0
  360. while True:
  361. try:
  362. time.sleep(0.5)
  363. main.poll()
  364. print("Main thread working...",)
  365. print("(active worker threads: %i)" % (threading.activeCount()-1, ))
  366. if i == 10:
  367. print("**** Adding 3 more worker threads...")
  368. main.createWorkers(3)
  369. if i == 20:
  370. print("**** Dismissing 2 worker threads...")
  371. main.dismissWorkers(2)
  372. i += 1
  373. except KeyboardInterrupt:
  374. print("**** Interrupted!")
  375. break
  376. except NoResultsPending:
  377. print("**** No pending results.")
  378. break
  379. if main.dismissedWorkers:
  380. print("Joining all dismissed worker threads...")
  381. main.joinAllDismissedWorkers()