ElggBatch.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. <?php
  2. /**
  3. * Efficiently run operations on batches of results for any function
  4. * that supports an options array.
  5. *
  6. * This is usually used with elgg_get_entities() and friends,
  7. * elgg_get_annotations(), and elgg_get_metadata().
  8. *
  9. * If you pass a valid PHP callback, all results will be run through that
  10. * callback. You can still foreach() through the result set after. Valid
  11. * PHP callbacks can be a string, an array, or a closure.
  12. * {@link http://php.net/manual/en/language.pseudo-types.php}
  13. *
  14. * The callback function must accept 3 arguments: an entity, the getter
  15. * used, and the options used.
  16. *
  17. * Results from the callback are stored in callbackResult. If the callback
  18. * returns only booleans, callbackResults will be the combined result of
  19. * all calls. If no entities are processed, callbackResults will be null.
  20. *
  21. * If the callback returns anything else, callbackresult will be an indexed
  22. * array of whatever the callback returns. If returning error handling
  23. * information, you should include enough information to determine which
  24. * result you're referring to.
  25. *
  26. * Don't combine returning bools and returning something else.
  27. *
  28. * Note that returning false will not stop the foreach.
  29. *
  30. * @warning If your callback or foreach loop deletes or disable entities
  31. * you MUST call setIncrementOffset(false) or set that when instantiating.
  32. * This forces the offset to stay what it was in the $options array.
  33. *
  34. * @example
  35. * <code>
  36. * // using foreach
  37. * $batch = new \ElggBatch('elgg_get_entities', array());
  38. * $batch->setIncrementOffset(false);
  39. *
  40. * foreach ($batch as $entity) {
  41. * $entity->disable();
  42. * }
  43. *
  44. * // using both a callback
  45. * $callback = function($result, $getter, $options) {
  46. * var_dump("Looking at annotation id: $result->id");
  47. * return true;
  48. * }
  49. *
  50. * $batch = new \ElggBatch('elgg_get_annotations', array('guid' => 2), $callback);
  51. * </code>
  52. *
  53. * @package Elgg.Core
  54. * @subpackage DataModel
  55. * @since 1.8
  56. */
  57. class ElggBatch
  58. implements \Iterator {
  59. /**
  60. * The objects to interator over.
  61. *
  62. * @var array
  63. */
  64. private $results = array();
  65. /**
  66. * The function used to get results.
  67. *
  68. * @var mixed A string, array, or closure, or lamda function
  69. */
  70. private $getter = null;
  71. /**
  72. * The number of results to grab at a time.
  73. *
  74. * @var int
  75. */
  76. private $chunkSize = 25;
  77. /**
  78. * A callback function to pass results through.
  79. *
  80. * @var mixed A string, array, or closure, or lamda function
  81. */
  82. private $callback = null;
  83. /**
  84. * Start after this many results.
  85. *
  86. * @var int
  87. */
  88. private $offset = 0;
  89. /**
  90. * Stop after this many results.
  91. *
  92. * @var int
  93. */
  94. private $limit = 0;
  95. /**
  96. * Number of processed results.
  97. *
  98. * @var int
  99. */
  100. private $retrievedResults = 0;
  101. /**
  102. * The index of the current result within the current chunk
  103. *
  104. * @var int
  105. */
  106. private $resultIndex = 0;
  107. /**
  108. * The index of the current chunk
  109. *
  110. * @var int
  111. */
  112. private $chunkIndex = 0;
  113. /**
  114. * The number of results iterated through
  115. *
  116. * @var int
  117. */
  118. private $processedResults = 0;
  119. /**
  120. * Is the getter a valid callback
  121. *
  122. * @var bool
  123. */
  124. private $validGetter = null;
  125. /**
  126. * The result of running all entities through the callback function.
  127. *
  128. * @var mixed
  129. */
  130. public $callbackResult = null;
  131. /**
  132. * If false, offset will not be incremented. This is used for callbacks/loops that delete.
  133. *
  134. * @var bool
  135. */
  136. private $incrementOffset = true;
  137. /**
  138. * Entities that could not be instantiated during a fetch
  139. *
  140. * @var \stdClass[]
  141. */
  142. private $incompleteEntities = array();
  143. /**
  144. * Total number of incomplete entities fetched
  145. *
  146. * @var int
  147. */
  148. private $totalIncompletes = 0;
  149. /**
  150. * Batches operations on any elgg_get_*() or compatible function that supports
  151. * an options array.
  152. *
  153. * Instead of returning all objects in memory, it goes through $chunk_size
  154. * objects, then requests more from the server. This avoids OOM errors.
  155. *
  156. * @param string $getter The function used to get objects. Usually
  157. * an elgg_get_*() function, but can be any valid PHP callback.
  158. * @param array $options The options array to pass to the getter function. If limit is
  159. * not set, 10 is used as the default. In most cases that is not
  160. * what you want.
  161. * @param mixed $callback An optional callback function that all results will be passed
  162. * to upon load. The callback needs to accept $result, $getter,
  163. * $options.
  164. * @param int $chunk_size The number of entities to pull in before requesting more.
  165. * You have to balance this between running out of memory in PHP
  166. * and hitting the db server too often.
  167. * @param bool $inc_offset Increment the offset on each fetch. This must be false for
  168. * callbacks that delete rows. You can set this after the
  169. * object is created with {@link \ElggBatch::setIncrementOffset()}.
  170. */
  171. public function __construct($getter, $options, $callback = null, $chunk_size = 25,
  172. $inc_offset = true) {
  173. $this->getter = $getter;
  174. $this->options = $options;
  175. $this->callback = $callback;
  176. $this->chunkSize = $chunk_size;
  177. $this->setIncrementOffset($inc_offset);
  178. if ($this->chunkSize <= 0) {
  179. $this->chunkSize = 25;
  180. }
  181. // store these so we can compare later
  182. $this->offset = elgg_extract('offset', $options, 0);
  183. $this->limit = elgg_extract('limit', $options, elgg_get_config('default_limit'));
  184. // if passed a callback, create a new \ElggBatch with the same options
  185. // and pass each to the callback.
  186. if ($callback && is_callable($callback)) {
  187. $batch = new \ElggBatch($getter, $options, null, $chunk_size, $inc_offset);
  188. $all_results = null;
  189. foreach ($batch as $result) {
  190. $result = call_user_func($callback, $result, $getter, $options);
  191. if (!isset($all_results)) {
  192. if ($result === true || $result === false || $result === null) {
  193. $all_results = $result;
  194. } else {
  195. $all_results = array();
  196. }
  197. }
  198. if (($result === true || $result === false || $result === null) && !is_array($all_results)) {
  199. $all_results = $result && $all_results;
  200. } else {
  201. $all_results[] = $result;
  202. }
  203. }
  204. $this->callbackResult = $all_results;
  205. }
  206. }
  207. /**
  208. * Tell the process that an entity was incomplete during a fetch
  209. *
  210. * @param \stdClass $row
  211. *
  212. * @access private
  213. */
  214. public function reportIncompleteEntity(\stdClass $row) {
  215. $this->incompleteEntities[] = $row;
  216. }
  217. /**
  218. * Fetches the next chunk of results
  219. *
  220. * @return bool
  221. */
  222. private function getNextResultsChunk() {
  223. // always reset results.
  224. $this->results = array();
  225. if (!isset($this->validGetter)) {
  226. $this->validGetter = is_callable($this->getter);
  227. }
  228. if (!$this->validGetter) {
  229. return false;
  230. }
  231. $limit = $this->chunkSize;
  232. // if someone passed limit = 0 they want everything.
  233. if ($this->limit != 0) {
  234. if ($this->retrievedResults >= $this->limit) {
  235. return false;
  236. }
  237. // if original limit < chunk size, set limit to original limit
  238. // else if the number of results we'll fetch if greater than the original limit
  239. if ($this->limit < $this->chunkSize) {
  240. $limit = $this->limit;
  241. } elseif ($this->retrievedResults + $this->chunkSize > $this->limit) {
  242. // set the limit to the number of results remaining in the original limit
  243. $limit = $this->limit - $this->retrievedResults;
  244. }
  245. }
  246. if ($this->incrementOffset) {
  247. $offset = $this->offset + $this->retrievedResults;
  248. } else {
  249. $offset = $this->offset + $this->totalIncompletes;
  250. }
  251. $current_options = array(
  252. 'limit' => $limit,
  253. 'offset' => $offset,
  254. '__ElggBatch' => $this,
  255. );
  256. $options = array_merge($this->options, $current_options);
  257. $this->incompleteEntities = array();
  258. $this->results = call_user_func($this->getter, $options);
  259. // batch result sets tend to be large; we don't want to cache these.
  260. _elgg_services()->db->disableQueryCache();
  261. $num_results = count($this->results);
  262. $num_incomplete = count($this->incompleteEntities);
  263. $this->totalIncompletes += $num_incomplete;
  264. if ($this->incompleteEntities) {
  265. // pad the front of the results with nulls representing the incompletes
  266. array_splice($this->results, 0, 0, array_pad(array(), $num_incomplete, null));
  267. // ...and skip past them
  268. reset($this->results);
  269. for ($i = 0; $i < $num_incomplete; $i++) {
  270. next($this->results);
  271. }
  272. }
  273. if ($this->results) {
  274. $this->chunkIndex++;
  275. // let the system know we've jumped past the nulls
  276. $this->resultIndex = $num_incomplete;
  277. $this->retrievedResults += ($num_results + $num_incomplete);
  278. if ($num_results == 0) {
  279. // This fetch was *all* incompletes! We need to fetch until we can either
  280. // offer at least one row to iterate over, or give up.
  281. return $this->getNextResultsChunk();
  282. }
  283. _elgg_services()->db->enableQueryCache();
  284. return true;
  285. } else {
  286. _elgg_services()->db->enableQueryCache();
  287. return false;
  288. }
  289. }
  290. /**
  291. * Increment the offset from the original options array? Setting to
  292. * false is required for callbacks that delete rows.
  293. *
  294. * @param bool $increment Set to false when deleting data
  295. * @return void
  296. */
  297. public function setIncrementOffset($increment = true) {
  298. $this->incrementOffset = (bool) $increment;
  299. }
  300. /**
  301. * Implements Iterator
  302. */
  303. /**
  304. * PHP Iterator Interface
  305. *
  306. * @see Iterator::rewind()
  307. * @return void
  308. */
  309. public function rewind() {
  310. $this->resultIndex = 0;
  311. $this->retrievedResults = 0;
  312. $this->processedResults = 0;
  313. // only grab results if we haven't yet or we're crossing chunks
  314. if ($this->chunkIndex == 0 || $this->limit > $this->chunkSize) {
  315. $this->chunkIndex = 0;
  316. $this->getNextResultsChunk();
  317. }
  318. }
  319. /**
  320. * PHP Iterator Interface
  321. *
  322. * @see Iterator::current()
  323. * @return mixed
  324. */
  325. public function current() {
  326. return current($this->results);
  327. }
  328. /**
  329. * PHP Iterator Interface
  330. *
  331. * @see Iterator::key()
  332. * @return int
  333. */
  334. public function key() {
  335. return $this->processedResults;
  336. }
  337. /**
  338. * PHP Iterator Interface
  339. *
  340. * @see Iterator::next()
  341. * @return mixed
  342. */
  343. public function next() {
  344. // if we'll be at the end.
  345. if (($this->processedResults + 1) >= $this->limit && $this->limit > 0) {
  346. $this->results = array();
  347. return false;
  348. }
  349. // if we'll need new results.
  350. if (($this->resultIndex + 1) >= $this->chunkSize) {
  351. if (!$this->getNextResultsChunk()) {
  352. $this->results = array();
  353. return false;
  354. }
  355. $result = current($this->results);
  356. } else {
  357. // the function above resets the indexes, so only inc if not
  358. // getting new set
  359. $this->resultIndex++;
  360. $result = next($this->results);
  361. }
  362. $this->processedResults++;
  363. return $result;
  364. }
  365. /**
  366. * PHP Iterator Interface
  367. *
  368. * @see Iterator::valid()
  369. * @return bool
  370. */
  371. public function valid() {
  372. if (!is_array($this->results)) {
  373. return false;
  374. }
  375. $key = key($this->results);
  376. return ($key !== null && $key !== false);
  377. }
  378. }