file_takeout.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. <?php
  2. //
  3. // Define variables
  4. //
  5. global $CONFIG;
  6. $file_takeout_tmp_files = array(); // keep track of all the tmp files, so we can remove them later.
  7. $site_url = elgg_get_site_url();
  8. $title = elgg_view_title("File Takeout");
  9. $guid_from_path = basename($_SERVER["REQUEST_URI"]);
  10. $logged_in_user = elgg_get_logged_in_user_entity();
  11. //
  12. // Helper functions
  13. //
  14. // Everything needed to convert HTML files to DOCX
  15. require_once(elgg_get_plugins_path() . 'file_takeout/html2docx/phpword/PHPWord.php');
  16. require_once(elgg_get_plugins_path() . 'file_takeout/html2docx/simplehtmldom/simple_html_dom.php');
  17. require_once(elgg_get_plugins_path() . 'file_takeout/html2docx/htmltodocx_converter/h2d_htmlconverter.php');
  18. require_once(elgg_get_plugins_path() . 'file_takeout/html2docx/styles.inc');
  19. require_once(elgg_get_plugins_path() . 'file_takeout/html2docx/support_functions.inc');
  20. function generate_docx($html, $file_path, &$file_takeout_tmp_files) {
  21. $phpword_object = new PHPWord();
  22. $section = $phpword_object->createSection();
  23. $html_dom = new simple_html_dom();
  24. $html_dom->load($html);
  25. $html_dom_array = $html_dom->find('html',0)->children();
  26. $paths = htmltodocx_paths();
  27. $initial_state = array(
  28. 'phpword_object' => &$phpword_object, // Must be passed by reference.
  29. 'base_root' => $paths['base_root'],
  30. 'base_path' => $paths['base_path'],
  31. 'current_style' => array('size' => '11'), // The PHPWord style on the top element.
  32. 'parents' => array(0 => 'body'), // Our parent is body.
  33. 'list_depth' => 0, // This is the current depth of any current list.
  34. 'context' => 'section', // Possible values - section, footer or header.
  35. 'pseudo_list' => TRUE, // NOTE: Word lists not yet supported (TRUE is the only option at present).
  36. 'pseudo_list_indicator_font_name' => 'Wingdings', // Bullet indicator font.
  37. 'pseudo_list_indicator_font_size' => '7', // Bullet indicator size.
  38. 'pseudo_list_indicator_character' => 'l ', // Gives a circle bullet point with wingdings.
  39. 'table_allowed' => TRUE, // Note, if you are adding this html into a PHPWord table you should set this to FALSE: tables cannot be nested in PHPWord.
  40. 'treat_div_as_paragraph' => FALSE, // If set to TRUE, each new div will trigger a new line in the Word document.
  41. 'style_sheet' => htmltodocx_styles(), // This is an array (the "style sheet") from styles.inc
  42. /* I added these to fix a bug for images not showing up in docx converted files */
  43. 'download_img_path' => elgg_get_data_path(),
  44. 'download_img_tmp' => &$file_takeout_tmp_files,
  45. );
  46. htmltodocx_insert_html($section, $html_dom_array[0]->nodes, $initial_state);
  47. $html_dom->clear();
  48. unset($html_dom);
  49. $objWriter = PHPWord_IOFactory::createWriter($phpword_object, 'Word2007'); // Word2007 is the only option :-(
  50. $objWriter->save($file_path);
  51. }
  52. // Save a few lines of code and use this helper function to grab all Elgg entities by owner and subtype (file, blog, page_top, bookmarks)
  53. function get_all_entities($guid, $subtype) {
  54. $options = array(
  55. 'type' => 'object',
  56. 'subtype' => $subtype,
  57. 'container_guid' => $guid,
  58. 'limit' => '',
  59. );
  60. return elgg_get_entities($options);
  61. }
  62. // Sanitize file names
  63. function sanitize_file_name($filename) {
  64. $filename_array = explode('.', $filename);
  65. if (count($filename_array) > 2) {
  66. $filename = implode('', $filename_array);
  67. }
  68. $strip = array("&#8216;", "&#8217;", "&#8220;", "&#8221;", "&#8211;", "&#8212;", "&#039;", "~", "`", "!", "@", "#", "$", "%", "^", "&", "*", "(", ")", "=", "+", "[", "{", "]", "’", "}", "\\", "|", ";", ":", "\"", "'", "—", "–", ",", "<", ">", "/", "?");
  69. $clean = trim(str_replace($strip, "", $filename));
  70. $clean = preg_replace('/\s+/', "_", $clean);
  71. return $clean;
  72. }
  73. function create_files_from_entities($entities, $entity_type, $subtype, $guid, &$zip, &$file_takeout_tmp_files) {
  74. if (count($entities) > 0) {
  75. $user = elgg_get_logged_in_user_entity();
  76. $user_guid = $user->getGUID();
  77. $export_type = '';
  78. if ($entity_type == 'file') {
  79. $export_type = elgg_get_plugin_user_setting('file_takeout_file_meta_export_type', $user_guid, 'file_takeout');
  80. } else {
  81. $export_type = elgg_get_plugin_user_setting('file_takeout_export_type', $user_guid, 'file_takeout');
  82. }
  83. if ($export_type == '') {
  84. $export_type = 'html';
  85. }
  86. $area = '<li style="font-family: Monaco, Consolas, monospace; font-size:0.85em;">...' . $entity_type . '/entries.xml</li>';
  87. $group_entity = get_entity($guid);
  88. $url = elgg_get_site_url() . $entity_type . '/group' . '/' . $guid . '/all';
  89. set_input('view', 'rss');
  90. $contents = <<<__HTML
  91. <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:georss="http://www.georss.org/georss" >
  92. <channel>
  93. <title><![CDATA[$group_entity->name]]></title>
  94. <link>$url</link>
  95. <description><![CDATA[]]></description>
  96. __HTML;
  97. $options = array(
  98. 'type' => 'object',
  99. 'subtype' => $subtype,
  100. 'container_guid' => $guid,
  101. 'limit' => '',
  102. );
  103. $contents .= elgg_list_entities($options);
  104. $contents .= <<<__HTML
  105. </channel>
  106. </rss>
  107. __HTML;
  108. set_input('view', 'default');
  109. $zip->addFromString($entity_type . '/entries.xml', $contents);
  110. // create a file for each entry
  111. foreach ($entities as $entity) {
  112. $author = get_entity($entity->owner_guid)->name;
  113. $pubdate = date('r', $entity->time_created);
  114. $filedate = date('Y-m-d', $entity->time_created);
  115. if ($entity_type == 'bookmarks') {
  116. $description = $entity->description.'<p>Address of bookmark: <a href="'.$entity->address.'">'.$entity->address.'</a></p>';
  117. } else if ($entity_type == 'file') {
  118. $description = $entity->description.'<p>File: <a href="files/'.sanitize_file_name($entity->originalfilename).'">'.sanitize_file_name($entity->originalfilename).'</a></p>';
  119. } else {
  120. $description = $entity->description;
  121. }
  122. $content = '';
  123. $content .= <<<__HTML
  124. <html>
  125. __HTML;
  126. if ($export_type != 'docx') {
  127. $content .= <<<__HTML
  128. <head>
  129. <style>
  130. body {
  131. font-family: 'Segoe UI Light', 'Segoe UI', Segoe, Tahoma, Helvetica, Arial, sans-serif;
  132. font-size: 0.85em;
  133. }
  134. </style>
  135. </head>
  136. __HTML;
  137. }
  138. $content .= <<<__HTML
  139. <body>
  140. <div class="post">
  141. <h1>$entity->title</h1>
  142. <h2 class="post-meta">By $author on $pubdate</h2>
  143. $description
  144. __HTML;
  145. if ($entity->countComments() > 0) {
  146. $comments = $entity->getAnnotations('generic_comment');
  147. $content .= <<<__HTML
  148. <div class="post-comments">
  149. <h2>Comments</h2>
  150. __HTML;
  151. foreach ($comments as $comment) {
  152. $comment_author = get_entity($comment->owner_guid)->name;
  153. $comment_pubdate = date('r', $comment->time_created);
  154. $content .= <<<__HTML
  155. <div class="post-comment">
  156. <h3 class="post-comment-meta">By $comment_author on $comment_pubdate</h3>
  157. $comment->value
  158. </div>
  159. __HTML;
  160. }
  161. $content .= <<<__HTML
  162. </div>
  163. __HTML;
  164. }
  165. $content .= <<<__HTML
  166. </div>
  167. __HTML;
  168. if ($export_type == 'aspx') {
  169. $content .= <<<__HTML
  170. <p><a href="javascript:history.back();">&lt; Back to file listing</a></p>
  171. </body>
  172. </html>
  173. __HTML;
  174. }
  175. $file_name = sanitize_file_name($filedate . '-' . $entity->title) . '.' . $export_type;
  176. $area .= '<li style="font-family: Monaco, Consolas, monospace; font-size: 0.85em;">...' . $entity_type . '/' . $file_name . '</li>';
  177. if ($export_type == 'docx') {
  178. $docx_filepath = elgg_get_data_path() . $file_name;
  179. generate_docx($content, $docx_filepath, $file_takeout_tmp_files);
  180. $zip->addFile($docx_filepath, $entity_type . '/' . $file_name);
  181. $file_takeout_tmp_files[] = $docx_filepath;
  182. } else {
  183. $zip->addFromString($entity_type . '/' . $file_name, $content);
  184. }
  185. }
  186. return $area;
  187. }
  188. }
  189. //
  190. // Logic to do the work ...
  191. //
  192. // Create the ZIP archive and make it available for download
  193. if ($guid_from_path != 'file_takeout') {
  194. $files = get_all_entities($guid_from_path, 'file');
  195. $blogs = get_all_entities($guid_from_path, 'blog');
  196. $pages = get_all_entities($guid_from_path, 'page_top');
  197. $bookmarks = get_all_entities($guid_from_path, 'bookmarks');
  198. if (count($files) > 0 || count($blogs) > 0 || count($pages) > 0 || count($bookmarks) > 0) {
  199. $area .= '<h3>' . get_entity($guid_from_path)->name . '</h3>';
  200. $area .= '<br><p>Zipping the following files...</p>';
  201. $area .= '<ul>';
  202. $archive_path = elgg_get_data_path() . $guid_from_path . '.zip';
  203. if (file_exists($archive_path)) {
  204. unlink($archive_path);
  205. }
  206. $zip = new ZipArchive;
  207. $res = $zip->open($archive_path, ZipArchive::CREATE);
  208. if ($res === TRUE) {
  209. foreach ($files as $file) {
  210. if (file_exists($file->getFilenameOnFilestore())) {
  211. $area .= '<li style="font-family: Monaco, Consolas, monospace; font-size: 0.85em;">...file/files/' . sanitize_file_name($file->originalfilename) . '</li>';
  212. $zip->addFile($file->getFilenameOnFilestore(), 'file/files/' . sanitize_file_name($file->originalfilename));
  213. } else {
  214. $area .= '<li>Could not find ' . $file->getFilenameOnFilestore() . '</li>';
  215. }
  216. }
  217. $area .= create_files_from_entities($files, 'file', 'file', $guid_from_path, $zip, $file_takeout_tmp_files);
  218. if (elgg_is_active_plugin('blog')) {
  219. $area .= create_files_from_entities($blogs, 'blog', 'blog', $guid_from_path, $zip, $file_takeout_tmp_files);
  220. }
  221. if (elgg_is_active_plugin('pages')) {
  222. $area .= create_files_from_entities($pages, 'pages', 'page_top', $guid_from_path, $zip, $file_takeout_tmp_files);
  223. }
  224. if (elgg_is_active_plugin('bookmarks')) {
  225. $area .= create_files_from_entities($bookmarks, 'bookmarks', 'bookmarks', $guid_from_path, $zip, $file_takeout_tmp_files);
  226. }
  227. $zip->close();
  228. // Clean up
  229. foreach ($file_takeout_tmp_files as $tmp_file){
  230. unlink($tmp_file);
  231. }
  232. $area .= '</ul>';
  233. $area .= '<br><p style="color: green;">ZIP file created successfully.</p><p>Download this <a href="'.$site_url.'file_takeout_download/'.$guid_from_path.'">ZIP file</a> to your computer and extract the contents to any folder.</p>';
  234. }
  235. } else {
  236. $area .= '<br><p style="color: red;">No files to export.</p>';
  237. }
  238. $area .= '<br><a href="'.$site_url.'file_takeout">&lt; Back to File Takeout</a>';
  239. }
  240. // Display a listing of all groups that contain files
  241. else {
  242. $area = '<br><p>This tool exports files from a group (which you own) into a ZIP archive. -- <a href="' . $site_url . 'settings/plugins">Configure Settings</a></p>';
  243. $all_groups = elgg_get_entities(array("type" => "group", "limit" => ""));
  244. $my_groups = 0;
  245. $sort_array = array();
  246. foreach ($all_groups as $group) {
  247. if (!isset($sort_array[$group->getOwnerEntity()->guid])) {
  248. $sort_array[$group->getOwnerEntity()->guid] = array();
  249. }
  250. $sort_array[$group->getOwnerEntity()->guid][] = $group;
  251. }
  252. foreach ($sort_array as $key => $val) {
  253. if ($key == $logged_in_user->guid || $logged_in_user->isAdmin() ) {
  254. $user = get_user($key);
  255. $area .= '<h3>Group Owner: ' . $user->name . '</h3>';
  256. $area .= '<ul>';
  257. foreach ($val as $group){
  258. $group_files = get_all_entities($group->guid, 'file');
  259. $area .= '<li>&gt; <a href="' . $group->getURL() . '">' . $group->name . '</a> (' . count($group_files) . ' files)';
  260. if (elgg_is_active_plugin('blog')) {
  261. $group_blogs = get_all_entities($group->guid, 'blog');
  262. $area .= '(' . count($group_blogs) . ' blogs)';
  263. }
  264. if (elgg_is_active_plugin('pages')) {
  265. $group_pages = get_all_entities($group->guid, 'page_top');
  266. $area .= '(' . count($group_pages) . ' pages)';
  267. }
  268. if (elgg_is_active_plugin('bookmarks')) {
  269. $group_bookmarks = get_all_entities($group->guid, 'bookmarks');
  270. $area .= '(' . count($group_bookmarks) . ' bookmarks)';
  271. }
  272. $area .= ' -- <a href="' . $_SERVER['REQUEST_URI'] . '/' . $group->guid . '">Download Archive</a></li>';
  273. $my_groups++;
  274. }
  275. $area .= '</ul><br>';
  276. }
  277. }
  278. if ($my_groups == 0) {
  279. $area .= '<p><span style="color: red;">You do not own any groups.</span></p><br>';
  280. }
  281. $user_files = get_all_entities($logged_in_user->guid, 'file');
  282. $area .= '<p>You may also download all your personal files (' . count($user_files) . ' files)';
  283. if (elgg_is_active_plugin('blog')) {
  284. $user_blogs = get_all_entities($logged_in_user->guid, 'blog');
  285. $area .= '(' . count($user_blogs) . ' blogs)';
  286. }
  287. if (elgg_is_active_plugin('pages')) {
  288. $user_pages = get_all_entities($logged_in_user->guid, 'page_top');
  289. $area .= '(' . count($user_pages) . ' pages)';
  290. }
  291. if (elgg_is_active_plugin('bookmarks')) {
  292. $user_bookmarks = get_all_entities($logged_in_user->guid, 'bookmarks');
  293. $area .= '(' . count($user_bookmarks) . ' bookmarks)';
  294. }
  295. $area .= ' -- <a href="' . $_SERVER['REQUEST_URI'] . '/' . $logged_in_user->guid . '">Download Archive</a></p>';
  296. }
  297. // Format page
  298. $body = elgg_view_layout('one_column', array('content' => $title . $area));
  299. // Draw it
  300. echo elgg_view_page("File Takeout", $body);
  301. ?>