scanner.php 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. <?php
  2. /**
  3. * Copyright (c) 2012 Robin Appelman <icewind@owncloud.com>
  4. * This file is licensed under the Affero General Public License version 3 or
  5. * later.
  6. * See the COPYING-README file.
  7. */
  8. namespace OC\Files\Cache;
  9. use OC\Files\Filesystem;
  10. use OC\Hooks\BasicEmitter;
  11. /**
  12. * Class Scanner
  13. *
  14. * Hooks available in scope \OC\Files\Cache\Scanner:
  15. * - scanFile(string $path, string $storageId)
  16. * - scanFolder(string $path, string $storageId)
  17. * - postScanFile(string $path, string $storageId)
  18. * - postScanFolder(string $path, string $storageId)
  19. *
  20. * @package OC\Files\Cache
  21. */
  22. class Scanner extends BasicEmitter {
  23. /**
  24. * @var \OC\Files\Storage\Storage $storage
  25. */
  26. private $storage;
  27. /**
  28. * @var string $storageId
  29. */
  30. private $storageId;
  31. /**
  32. * @var \OC\Files\Cache\Cache $cache
  33. */
  34. private $cache;
  35. /**
  36. * @var \OC\Files\Cache\Permissions $permissionsCache
  37. */
  38. private $permissionsCache;
  39. const SCAN_RECURSIVE = true;
  40. const SCAN_SHALLOW = false;
  41. const REUSE_ETAG = 1;
  42. const REUSE_SIZE = 2;
  43. public function __construct(\OC\Files\Storage\Storage $storage) {
  44. $this->storage = $storage;
  45. $this->storageId = $this->storage->getId();
  46. $this->cache = $storage->getCache();
  47. $this->permissionsCache = $storage->getPermissionsCache();
  48. }
  49. /**
  50. * get all the metadata of a file or folder
  51. * *
  52. *
  53. * @param string $path
  54. * @return array with metadata of the file
  55. */
  56. public function getData($path) {
  57. if (!$this->storage->isReadable($path)) {
  58. //cant read, nothing we can do
  59. \OCP\Util::writeLog('OC\Files\Cache\Scanner', "!!! Path '$path' is not readable !!!", \OCP\Util::DEBUG);
  60. return null;
  61. }
  62. $data = array();
  63. $data['mimetype'] = $this->storage->getMimeType($path);
  64. $data['mtime'] = $this->storage->filemtime($path);
  65. if ($data['mimetype'] == 'httpd/unix-directory') {
  66. $data['size'] = -1; //unknown
  67. } else {
  68. $data['size'] = $this->storage->filesize($path);
  69. }
  70. $data['etag'] = $this->storage->getETag($path);
  71. $data['storage_mtime'] = $data['mtime'];
  72. return $data;
  73. }
  74. /**
  75. * scan a single file and store it in the cache
  76. *
  77. * @param string $file
  78. * @param int $reuseExisting
  79. * @param bool $parentExistsInCache
  80. * @return array with metadata of the scanned file
  81. */
  82. public function scanFile($file, $reuseExisting = 0, $parentExistsInCache = false) {
  83. if (!self::isPartialFile($file)
  84. and !Filesystem::isFileBlacklisted($file)
  85. ) {
  86. $this->emit('\OC\Files\Cache\Scanner', 'scanFile', array($file, $this->storageId));
  87. \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_file', array('path' => $file, 'storage' => $this->storageId));
  88. $data = $this->getData($file);
  89. if ($data) {
  90. if ($file and !$parentExistsInCache) {
  91. $parent = dirname($file);
  92. if ($parent === '.' or $parent === '/') {
  93. $parent = '';
  94. }
  95. if (!$this->cache->inCache($parent)) {
  96. $this->scanFile($parent);
  97. }
  98. }
  99. $newData = $data;
  100. $cacheData = $this->cache->get($file);
  101. if ($cacheData) {
  102. if (isset($cacheData['fileid'])) {
  103. $this->permissionsCache->remove($cacheData['fileid']);
  104. }
  105. if ($reuseExisting) {
  106. // prevent empty etag
  107. $etag = $cacheData['etag'];
  108. $propagateETagChange = false;
  109. if (empty($etag)) {
  110. $etag = $data['etag'];
  111. $propagateETagChange = true;
  112. }
  113. // only reuse data if the file hasn't explicitly changed
  114. if (isset($data['mtime']) && isset($cacheData['mtime']) && $data['mtime'] === $cacheData['mtime']) {
  115. if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) {
  116. $data['size'] = $cacheData['size'];
  117. }
  118. if ($reuseExisting & self::REUSE_ETAG) {
  119. $data['etag'] = $etag;
  120. if ($propagateETagChange) {
  121. $parent = $file;
  122. while ($parent !== '') {
  123. $parent = dirname($parent);
  124. if ($parent === '.') {
  125. $parent = '';
  126. }
  127. $parentCacheData = $this->cache->get($parent);
  128. $this->cache->update($parentCacheData['fileid'], array(
  129. 'etag' => $this->storage->getETag($parent),
  130. ));
  131. }
  132. }
  133. }
  134. }
  135. // Only update metadata that has changed
  136. $newData = array_diff_assoc($data, $cacheData);
  137. if (isset($newData['etag'])) {
  138. $cacheDataString = print_r($cacheData, true);
  139. $dataString = print_r($data, true);
  140. \OCP\Util::writeLog('OC\Files\Cache\Scanner',
  141. "!!! No reuse of etag for '$file' !!! \ncache: $cacheDataString \ndata: $dataString",
  142. \OCP\Util::DEBUG);
  143. }
  144. }
  145. }
  146. if (!empty($newData)) {
  147. $this->cache->put($file, $newData);
  148. $this->emit('\OC\Files\Cache\Scanner', 'postScanFile', array($file, $this->storageId));
  149. \OC_Hook::emit('\OC\Files\Cache\Scanner', 'post_scan_file', array('path' => $file, 'storage' => $this->storageId));
  150. }
  151. } else {
  152. $this->cache->remove($file);
  153. }
  154. return $data;
  155. }
  156. return null;
  157. }
  158. /**
  159. * scan a folder and all it's children
  160. *
  161. * @param string $path
  162. * @param bool $recursive
  163. * @param int $reuse
  164. * @return int the size of the scanned folder or -1 if the size is unknown at this stage
  165. */
  166. public function scan($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1) {
  167. if ($reuse === -1) {
  168. $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : 0;
  169. }
  170. $this->scanFile($path, $reuse);
  171. return $this->scanChildren($path, $recursive, $reuse);
  172. }
  173. /**
  174. * scan all the files and folders in a folder
  175. *
  176. * @param string $path
  177. * @param bool $recursive
  178. * @param int $reuse
  179. * @return int the size of the scanned folder or -1 if the size is unknown at this stage
  180. */
  181. public function scanChildren($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1) {
  182. if ($reuse === -1) {
  183. $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : 0;
  184. }
  185. $this->emit('\OC\Files\Cache\Scanner', 'scanFolder', array($path, $this->storageId));
  186. $size = 0;
  187. $childQueue = array();
  188. $existingChildren = array();
  189. if ($this->cache->inCache($path)) {
  190. $children = $this->cache->getFolderContents($path);
  191. foreach ($children as $child) {
  192. $existingChildren[] = $child['name'];
  193. }
  194. }
  195. $newChildren = array();
  196. if ($this->storage->is_dir($path) && ($dh = $this->storage->opendir($path))) {
  197. $exceptionOccurred = false;
  198. \OC_DB::beginTransaction();
  199. if (is_resource($dh)) {
  200. while (($file = readdir($dh)) !== false) {
  201. $child = ($path) ? $path . '/' . $file : $file;
  202. if (!Filesystem::isIgnoredDir($file)) {
  203. $newChildren[] = $file;
  204. try {
  205. $data = $this->scanFile($child, $reuse, true);
  206. if ($data) {
  207. if ($data['size'] === -1) {
  208. if ($recursive === self::SCAN_RECURSIVE) {
  209. $childQueue[] = $child;
  210. } else {
  211. $size = -1;
  212. }
  213. } else if ($size !== -1) {
  214. $size += $data['size'];
  215. }
  216. }
  217. }
  218. catch (\Doctrine\DBAL\DBALException $ex){
  219. // might happen if inserting duplicate while a scanning
  220. // process is running in parallel
  221. // log and ignore
  222. \OC_Log::write('core', 'Exception while scanning file "' . $child . '": ' . $ex->getMessage(), \OC_Log::DEBUG);
  223. $exceptionOccurred = true;
  224. }
  225. }
  226. }
  227. }
  228. $removedChildren = \array_diff($existingChildren, $newChildren);
  229. foreach ($removedChildren as $childName) {
  230. $child = ($path) ? $path . '/' . $childName : $childName;
  231. $this->cache->remove($child);
  232. }
  233. \OC_DB::commit();
  234. if ($exceptionOccurred){
  235. // It might happen that the parallel scan process has already
  236. // inserted mimetypes but those weren't available yet inside the transaction
  237. // To make sure to have the updated mime types in such cases,
  238. // we reload them here
  239. $this->cache->loadMimetypes();
  240. }
  241. foreach ($childQueue as $child) {
  242. $childSize = $this->scanChildren($child, self::SCAN_RECURSIVE, $reuse);
  243. if ($childSize === -1) {
  244. $size = -1;
  245. } else {
  246. $size += $childSize;
  247. }
  248. }
  249. $this->cache->put($path, array('size' => $size));
  250. }
  251. $this->emit('\OC\Files\Cache\Scanner', 'postScanFolder', array($path, $this->storageId));
  252. return $size;
  253. }
  254. /**
  255. * @brief check if the file should be ignored when scanning
  256. * NOTE: files with a '.part' extension are ignored as well!
  257. * prevents unfinished put requests to be scanned
  258. * @param String $file
  259. * @return boolean
  260. */
  261. public static function isPartialFile($file) {
  262. if (pathinfo($file, PATHINFO_EXTENSION) === 'part') {
  263. return true;
  264. }
  265. return false;
  266. }
  267. /**
  268. * walk over any folders that are not fully scanned yet and scan them
  269. */
  270. public function backgroundScan() {
  271. $lastPath = null;
  272. while (($path = $this->cache->getIncomplete()) !== false && $path !== $lastPath) {
  273. $this->scan($path);
  274. $this->cache->correctFolderSize($path);
  275. $lastPath = $path;
  276. }
  277. }
  278. }