/* * Copyright (C) 2011 Andrea Mazzoleni * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "portable.h" #include "support.h" #include "elem.h" #include "state.h" #include "parity.h" struct snapraid_scan { struct snapraid_state* state; /**< State used. */ struct snapraid_disk* disk; /**< Disk used. */ /** * Counters of changes. */ unsigned count_equal; /**< Files equal. */ unsigned count_move; /**< Files with a different name, but equal inode, size and timestamp in the same disk. */ unsigned count_restore; /**< Files with equal name, size and timestamp, but different inode. */ unsigned count_change; /**< Files with same name, but different size and/or timestamp. */ unsigned count_copy; /**< Files new, with same name size and timestamp of a file in a different disk. */ unsigned count_insert; /**< Files new. */ unsigned count_remove; /**< Files removed. */ tommy_list file_insert_list; /**< Files to insert. */ tommy_list link_insert_list; /**< Links to insert. */ tommy_list dir_insert_list; /**< Dirs to insert. */ /* nodes for data structures */ tommy_node node; }; /** * Remove the specified link from the data set. */ static void scan_link_remove(struct snapraid_scan* scan, struct snapraid_link* slink) { struct snapraid_state* state = scan->state; struct snapraid_disk* disk = scan->disk; /* state changed */ state->need_write = 1; /* remove the file from the link containers */ tommy_hashdyn_remove_existing(&disk->linkset, &slink->nodeset); tommy_list_remove_existing(&disk->linklist, &slink->nodelist); /* deallocate */ link_free(slink); } /** * Insert the specified link in the data set. */ static void scan_link_insert(struct snapraid_scan* scan, struct snapraid_link* slink) { struct snapraid_state* state = scan->state; struct snapraid_disk* disk = scan->disk; /* state changed */ state->need_write = 1; /* insert the link in the link containers */ tommy_hashdyn_insert(&disk->linkset, &slink->nodeset, slink, link_name_hash(slink->sub)); tommy_list_insert_tail(&disk->linklist, &slink->nodelist, slink); } /** * Process a symbolic link. */ static void scan_link(struct snapraid_scan* scan, int is_diff, const char* sub, const char* linkto, unsigned link_flag) { struct snapraid_state* state = scan->state; struct snapraid_disk* disk = scan->disk; struct snapraid_link* slink; char esc_buffer[ESC_MAX]; /* check if the link already exists */ slink = tommy_hashdyn_search(&disk->linkset, link_name_compare_to_arg, sub, link_name_hash(sub)); if (slink) { /* check if multiple files have the same name */ if (link_flag_has(slink, FILE_IS_PRESENT)) { /* LCOV_EXCL_START */ log_fatal("Internal inconsistency for link '%s%s'\n", disk->dir, sub); os_abort(); /* LCOV_EXCL_STOP */ } /* mark as present */ link_flag_set(slink, FILE_IS_PRESENT); /* check if the link is not changed and it's of the same kind */ if (strcmp(slink->linkto, linkto) == 0 && link_flag == link_flag_get(slink, FILE_IS_LINK_MASK)) { /* it's equal */ ++scan->count_equal; if (state->opt.gui) { log_tag("scan:equal:%s:%s\n", disk->name, esc_tag(slink->sub, esc_buffer)); } } else { /* it's an update */ /* we have to save the linkto/type */ state->need_write = 1; ++scan->count_change; log_tag("scan:update:%s:%s\n", disk->name, esc_tag(slink->sub, esc_buffer)); if (is_diff) { printf("update %s\n", fmt_term(disk, slink->sub, esc_buffer)); } /* update it */ free(slink->linkto); slink->linkto = strdup_nofail(linkto); link_flag_let(slink, link_flag, FILE_IS_LINK_MASK); } /* nothing more to do */ return; } else { /* create the new link */ ++scan->count_insert; log_tag("scan:add:%s:%s\n", disk->name, esc_tag(sub, esc_buffer)); if (is_diff) { printf("add %s\n", fmt_term(disk, sub, esc_buffer)); } /* and continue to insert it */ } /* insert it */ slink = link_alloc(sub, linkto, link_flag); /* mark it as present */ link_flag_set(slink, FILE_IS_PRESENT); /* insert it in the delayed insert list */ tommy_list_insert_tail(&scan->link_insert_list, &slink->nodelist, slink); } /** * Insert the specified file in the parity. */ static void scan_file_allocate(struct snapraid_scan* scan, struct snapraid_file* file) { struct snapraid_state* state = scan->state; struct snapraid_disk* disk = scan->disk; block_off_t i; block_off_t parity_pos; /* state changed */ state->need_write = 1; /* allocate the blocks of the file */ parity_pos = disk->first_free_block; for (i = 0; i < file->blockmax; ++i) { struct snapraid_block* block; struct snapraid_block* over_block; snapraid_info info; /* increment the position until the first really free block */ while (block_has_file(fs_par2block_find(disk, parity_pos))) ++parity_pos; /* get block we are going to overwrite, if any */ over_block = fs_par2block_find(disk, parity_pos); /* deallocate it */ if (over_block != BLOCK_NULL) fs_deallocate(disk, parity_pos); /* get block specific info */ info = info_get(&state->infoarr, parity_pos); /* get the new block we are going to write */ block = fs_file2block_get(file, i); /* if the file block already has an updated hash without rehash */ if (block_has_updated_hash(block) && !info_get_rehash(info)) { /* the only possible case is for REP blocks */ assert(block_state_get(block) == BLOCK_STATE_REP); /* convert to a REP block */ block_state_set(block, BLOCK_STATE_REP); /* and keep the hash as it's */ } else { unsigned over_state; /* convert to a CHG block */ block_state_set(block, BLOCK_STATE_CHG); /* state of the block we are going to overwrite */ over_state = block_state_get(over_block); /* if the block is an empty one */ if (over_state == BLOCK_STATE_EMPTY) { /* the block was empty and filled with zeros */ /* set the hash to the special ZERO value */ hash_zero_set(block->hash); } else { /* otherwise it's a DELETED one */ assert(over_state == BLOCK_STATE_DELETED); /* copy the past hash of the block */ memcpy(block->hash, over_block->hash, BLOCK_HASH_SIZE); /* if we have not already cleared the past hash */ if (!state->clear_past_hash) { /* in this case we don't know if the old state is still the one */ /* stored inside the parity, because after an aborted sync, the parity */ /* may be or may be not have been updated with the new data */ /* Then we reset the hash to a bogus value */ /* For example: */ /* - One file is deleted */ /* - Sync aborted after, updating the parity to the new state, */ /* but without saving the content file representing this new state. */ /* - Another file is added again (exactly here) */ /* with the hash of DELETED block not representing the real parity state */ hash_invalid_set(block->hash); } } } /* store in the disk map, after invalidating all the other blocks */ fs_allocate(disk, parity_pos, file, i); /* set the new free position */ disk->first_free_block = parity_pos + 1; } /* insert in the list of contained files */ tommy_list_insert_tail(&disk->filelist, &file->nodelist, file); } /** * Delete the specified file from the parity. * * Note that the parity remains allocated, but the blocks and the file are marked as DELETED. * The file is then inserted in the deleted set, and it should not be deallocated, * as the parity still references it. */ static void scan_file_deallocate(struct snapraid_scan* scan, struct snapraid_file* file) { struct snapraid_state* state = scan->state; struct snapraid_disk* disk = scan->disk; block_off_t i; /* remove from the list of contained files */ tommy_list_remove_existing(&disk->filelist, &file->nodelist); /* state changed */ state->need_write = 1; /* here we are supposed to adjust the ::first_free_block position */ /* with the parity position we are deleting */ /* but we also know that we do only delayed insert, after all the deletion, */ /* so at this point ::first_free_block is always at 0, and we don't need to update it */ if (disk->first_free_block != 0) { /* LCOV_EXCL_START */ log_fatal("Internal inconsistency for first free position at '%u' deallocating file '%s'\n", disk->first_free_block, file->sub); os_abort(); /* LCOV_EXCL_STOP */ } /* free all the blocks of the file */ for (i = 0; i < file->blockmax; ++i) { struct snapraid_block* block = fs_file2block_get(file, i); unsigned block_state; /* in case we scan after an aborted sync, */ /* we could get also intermediate states */ block_state = block_state_get(block); switch (block_state) { case BLOCK_STATE_BLK : /* we keep the hash making it an "old" hash, because the parity is still containing data for it */ break; case BLOCK_STATE_CHG : /* if we have not already cleared the past hash */ if (!state->clear_past_hash) { /* in these cases we don't know if the old state is still the one */ /* stored inside the parity, because after an aborted sync, the parity */ /* may be or may be not have been updated with the data that it's now */ /* deleted. Then we reset the hash to a bogus value. */ /* For example: */ /* - One file is added */ /* - Sync aborted after updating the parity to the new state, */ /* but without saving the content file representing this new state. */ /* - File is now deleted after the aborted sync */ /* - Sync again, deleting the blocks (exactly here) */ /* with the hash of CHG block not representing the real parity state */ hash_invalid_set(block->hash); } break; case BLOCK_STATE_REP : /* we just don't know the old hash, and then we set it to invalid */ hash_invalid_set(block->hash); break; default : /* LCOV_EXCL_START */ log_fatal("Internal inconsistency in file '%s' deallocating block '%u:%u' state %u\n", file->sub, i, file->blockmax, block_state); os_abort(); /* LCOV_EXCL_STOP */ } /* set the block as deleted */ block_state_set(block, BLOCK_STATE_DELETED); } /* mark the file as deleted */ file_flag_set(file, FILE_IS_DELETED); /* insert it in the list of deleted blocks */ tommy_list_insert_tail(&disk->deletedlist, &file->nodelist, file); } static void scan_file_delayed_allocate(struct snapraid_scan* scan, struct snapraid_file* file) { struct snapraid_state* state = scan->state; struct snapraid_disk* disk = scan->disk; /* if we sort for physical offsets we have to read them for new files */ if (state->opt.force_order == SORT_PHYSICAL && file->physical == FILEPHY_UNREAD_OFFSET ) { char path_next[PATH_MAX]; pathprint(path_next, sizeof(path_next), "%s%s", disk->dir, file->sub); if (filephy(path_next, file->size, &file->physical) != 0) { /* LCOV_EXCL_START */ log_fatal("Error in getting the physical offset of file '%s'. %s.\n", path_next, strerror(errno)); exit(EXIT_FAILURE); /* LCOV_EXCL_STOP */ } } /* insert in the delayed list */ tommy_list_insert_tail(&scan->file_insert_list, &file->nodelist, file); } /** * Check if a file is completely formed of blocks with invalid parity, * and no rehash is tagged, and if it has at least one block. */ static int file_is_full_invalid_parity_and_stable(struct snapraid_state* state, struct snapraid_disk* disk, struct snapraid_file* file) { block_off_t i; /* with no block, it never has an invalid parity */ if (file->blockmax == 0) return 0; /* check all blocks */ for (i = 0; i < file->blockmax; ++i) { snapraid_info info; struct snapraid_block* block = fs_file2block_get(file, i); block_off_t parity_pos; /* exclude blocks with parity */ if (!block_has_invalid_parity(block)) return 0; /* * Get the parity position. * * Note that here we expect to always have mapped * parity, because kept files always have it. * * Anyway, checking for POS_NULL doesn't hurt. */ parity_pos = fs_file2par_find(disk, file, i); /* if it's not mapped, it cannot have rehash */ if (parity_pos != POS_NULL) { /* get block specific info */ info = info_get(&state->infoarr, parity_pos); /* if rehash fails */ if (info_get_rehash(info)) return 0; } } return 1; } /** * Check if a file is completely formed of blocks with an updated hash, * and no rehash is tagged, and if it has at least one block. */ static int file_is_full_hashed_and_stable(struct snapraid_state* state, struct snapraid_disk* disk, struct snapraid_file* file) { block_off_t i; /* with no block, it never has a hash */ if (file->blockmax == 0) return 0; /* check all blocks */ for (i = 0; i < file->blockmax; ++i) { snapraid_info info; struct snapraid_block* block = fs_file2block_get(file, i); block_off_t parity_pos; /* exclude blocks without hash */ if (!block_has_updated_hash(block)) return 0; /* * Get the parity position. * * Note that it's possible to have files * not mapped into the parity, even if they * have a valid hash. * * This happens for example, for 'copied' files * that have REP blocks, but not yet mapped. * * If there are multiple copies, it's also possible * that such files are used as 'source' to copy * hashes, and then to get them inside this function. */ parity_pos = fs_file2par_find(disk, file, i); /* if it's not mapped, it cannot have rehash */ if (parity_pos != POS_NULL) { /* get block specific info */ info = info_get(&state->infoarr, parity_pos); /* exclude blocks needing a rehash */ if (info_get_rehash(info)) return 0; } } return 1; } /** * Refresh the file info. * * This is needed by Windows as the normal way to list directories may report not * updated info. Only the GetFileInformationByHandle() func, called file-by-file, * really ensures to return synced info. * * If this happens, we read also the physical offset, to avoid to read it later. */ static void scan_file_refresh(struct snapraid_scan* scan, const char* sub, struct stat* st, uint64_t* physical) { #if HAVE_LSTAT_SYNC struct snapraid_state* state = scan->state; struct snapraid_disk* disk = scan->disk; /* if the st_sync is not set, ensure to get synced info */ if (st->st_sync == 0) { char path_next[PATH_MAX]; struct stat synced_st; pathprint(path_next, sizeof(path_next), "%s%s", disk->dir, sub); /* if we sort for physical offsets we have to read them for new files */ if (state->opt.force_order == SORT_PHYSICAL && *physical == FILEPHY_UNREAD_OFFSET ) { /* do nothing, leave the pointer to read the physical offset */ } else { physical = 0; /* set the pointer to 0 to read nothing */ } if (lstat_sync(path_next, &synced_st, physical) != 0) { /* LCOV_EXCL_START */ log_fatal("Error in stat file '%s'. %s.\n", path_next, strerror(errno)); exit(EXIT_FAILURE); /* LCOV_EXCL_STOP */ } if (st->st_mtime != synced_st.st_mtime || st->st_mtimensec != synced_st.st_mtimensec ) { #ifndef _WIN32 /* * In Windows having different metadata is expected with open files * because the metadata in the directory is updated only when the file * is closed. * * The same happens for hardlinks that duplicate metatada. * The link metadata is updated only when the link is opened. * This extends also to st_size and st_nlink. * * See also: * Why is the file size reported incorrectly for files that are still being written to? * http://blogs.msdn.com/b/oldnewthing/archive/2011/12/26/10251026.aspx */ log_fatal("WARNING! Detected uncached time change from %" PRIu64 ".%09u to %" PRIu64 ".%09u for file '%s'\n", (uint64_t)st->st_mtime, (uint32_t)st->st_mtimensec, (uint64_t)synced_st.st_mtime, (uint32_t)synced_st.st_mtimensec, sub); log_fatal("It's better if you run SnapRAID without other processes running.\n"); #endif st->st_mtime = synced_st.st_mtime; st->st_mtimensec = synced_st.st_mtimensec; } if (st->st_size != synced_st.st_size) { #ifndef _WIN32 log_fatal("WARNING! Detected uncached size change from %" PRIu64 " to %" PRIu64 " for file '%s'\n", (uint64_t)st->st_size, (uint64_t)synced_st.st_size, sub); log_fatal("It's better if you run SnapRAID without other processes running.\n"); #endif st->st_size = synced_st.st_size; } if (st->st_nlink != synced_st.st_nlink) { #ifndef _WIN32 log_fatal("WARNING! Detected uncached nlink change from %u to %u for file '%s'\n", (uint32_t)st->st_nlink, (uint32_t)synced_st.st_nlink, sub); log_fatal("It's better if you run SnapRAID without other processes running.\n"); #endif st->st_nlink = synced_st.st_nlink; } if (st->st_ino != synced_st.st_ino) { log_fatal("DANGER! Detected uncached inode change from %" PRIu64 " to %" PRIu64 " for file '%s'\n", (uint64_t)st->st_ino, (uint64_t)synced_st.st_ino, sub); log_fatal("It's better if you run SnapRAID without other processes running.\n"); /* at this point, it's too late to change inode */ /* and having inconsistent inodes may result to internal failures */ /* so, it's better to abort */ exit(EXIT_FAILURE); } } #else (void)scan; (void)sub; (void)st; (void)physical; #endif } /** * Insert the file in the data set. */ static void scan_file_insert(struct snapraid_scan* scan, struct snapraid_file* file) { struct snapraid_disk* disk = scan->disk; /* insert the file in the containers */ if (!file_flag_has(file, FILE_IS_WITHOUT_INODE)) tommy_hashdyn_insert(&disk->inodeset, &file->nodeset, file, file_inode_hash(file->inode)); tommy_hashdyn_insert(&disk->pathset, &file->pathset, file, file_path_hash(file->sub)); tommy_hashdyn_insert(&disk->stampset, &file->stampset, file, file_stamp_hash(file->size, file->mtime_sec, file->mtime_nsec)); /* delayed allocation of the parity */ scan_file_delayed_allocate(scan, file); } /** * Remove the file from the data set. * * File is then deleted. */ static void scan_file_remove(struct snapraid_scan* scan, struct snapraid_file* file) { struct snapraid_disk* disk = scan->disk; /* remove the file from the containers */ if (!file_flag_has(file, FILE_IS_WITHOUT_INODE)) tommy_hashdyn_remove_existing(&disk->inodeset, &file->nodeset); tommy_hashdyn_remove_existing(&disk->pathset, &file->pathset); tommy_hashdyn_remove_existing(&disk->stampset, &file->stampset); /* deallocate the file from the parity */ scan_file_deallocate(scan, file); } /** * Keep the file as it's (or with only a name/inode modification). * * If the file is kept, nothing has to be done. * * But if a file contains only blocks with invalid parity, it's reallocated to ensure * to always minimize the space used in the parity. * * This could happen after a failed sync, when some other files are deleted, * and then new ones can be moved backward to fill the hole created. */ static void scan_file_keep(struct snapraid_scan* scan, struct snapraid_file* file) { struct snapraid_disk* disk = scan->disk; /* if the file is full invalid, schedule a reinsert at later stage */ if (file_is_full_invalid_parity_and_stable(scan->state, disk, file)) { struct snapraid_file* copy = file_dup(file); /* remove the file */ scan_file_remove(scan, file); /* reinsert the copy in the delayed list */ scan_file_insert(scan, copy); } } /** * Process a file. */ static void scan_file(struct snapraid_scan* scan, int is_diff, const char* sub, struct stat* st, uint64_t physical) { struct snapraid_state* state = scan->state; struct snapraid_disk* disk = scan->disk; struct snapraid_file* file; tommy_node* i; int is_original_file_size_different_than_zero; int is_file_already_present; data_off_t file_already_present_size; int64_t file_already_present_mtime_sec; int file_already_present_mtime_nsec; int is_file_reported; char esc_buffer[ESC_MAX]; char esc_buffer_alt[ESC_MAX]; /* * If the disk has persistent inodes and UUID, try a search on the past inodes, * to detect moved files. * * For persistent inodes we mean inodes that keep their values when the file-system * is unmounted and remounted. This don't always happen. * * Cases found are: * - Linux FUSE with exFAT driver from https://code.google.com/p/exfat/. * Inodes are reassigned at every mount restarting from 1 and incrementing. * As worse, the exFAT support in FUSE doesn't use sub-second precision in timestamps * making inode collision more easy (exFAT by design supports 10ms precision). * - Linux VFAT kernel (3.2) driver. Inodes are fully reassigned at every mount. * * In such cases, to avoid possible random collisions, it's better to disable the moved * file recognition. * * For persistent UUID we mean that it has the same UUID as before. * Otherwise, if the UUID is changed, likely it's a new recreated file-system, * and then the inode have no meaning. * * Note that to disable the search by past inode, we do this implicitly * removing all the past inode before searching for files. * This ensures that no file is found with a past inode, but at the same time, * it allows to find new files with the same inode, to identify them as hardlinks. */ int has_past_inodes = !disk->has_volatile_inodes && !disk->has_different_uuid && !disk->has_unsupported_uuid; /* always search with the new inode, in the all new inodes found until now, */ /* with the eventual presence of also the past inodes */ uint64_t inode = st->st_ino; file = tommy_hashdyn_search(&disk->inodeset, file_inode_compare_to_arg, &inode, file_inode_hash(inode)); /* identify moved files with past inodes and hardlinks with the new inodes */ if (file) { /* check if the file is not changed */ if (file->size == st->st_size && file->mtime_sec == st->st_mtime && (file->mtime_nsec == STAT_NSEC(st) /* always accept the stored value if it's STAT_NSEC_INVALID */ /* it happens when upgrading from an old version of SnapRAID */ /* not yet supporting the nanosecond field */ || file->mtime_nsec == STAT_NSEC_INVALID ) ) { /* check if multiple files have the same inode */ if (file_flag_has(file, FILE_IS_PRESENT)) { /* if has_volatile_hardlinks is true, the nlink value is not reliable */ if (!disk->has_volatile_hardlinks && st->st_nlink == 1) { /* LCOV_EXCL_START */ log_fatal("Internal inode '%" PRIu64 "' inconsistency for file '%s%s' already present\n", (uint64_t)st->st_ino, disk->dir, sub); os_abort(); /* LCOV_EXCL_STOP */ } /* it's a hardlink */ scan_link(scan, is_diff, sub, file->sub, FILE_IS_HARDLINK); return; } /* mark as present */ file_flag_set(file, FILE_IS_PRESENT); /* update the nanoseconds mtime only if different */ /* to avoid unneeded updates */ if (file->mtime_nsec == STAT_NSEC_INVALID && STAT_NSEC(st) != file->mtime_nsec ) { file->mtime_nsec = STAT_NSEC(st); /* we have to save the new mtime */ state->need_write = 1; } if (strcmp(file->sub, sub) != 0) { /* if the path is different, it means a moved file with the same inode */ ++scan->count_move; log_tag("scan:move:%s:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer), esc_tag(sub, esc_buffer_alt)); if (is_diff) { printf("move %s -> %s\n", fmt_term(disk, file->sub, esc_buffer), fmt_term(disk, sub, esc_buffer_alt)); } /* remove from the name set */ tommy_hashdyn_remove_existing(&disk->pathset, &file->pathset); /* save the new name */ file_rename(file, sub); /* reinsert in the name set */ tommy_hashdyn_insert(&disk->pathset, &file->pathset, file, file_path_hash(file->sub)); /* we have to save the new name */ state->need_write = 1; } else { /* otherwise it's equal */ ++scan->count_equal; if (state->opt.gui) { log_tag("scan:equal:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer)); } } /* mark the file as kept */ scan_file_keep(scan, file); /* nothing more to do */ return; } /* * Here the file matches the inode, but not the other info * * It could be a modified file with the same name, * or a restored/copied file that get assigned a previously used inode, * or a file-system with not persistent inodes. * * In NTFS it could be also a hardlink, because in NTFS * hardlink don't share the same directory information, * like attribute and time. * * For example: * C:> echo A > A * C:> mklink /H B A * ...wait one minute * C:> echo AAAAAAAAAAAAAA > A * C:> dir * ...both time and size of A and B don't match! */ if (file_flag_has(file, FILE_IS_PRESENT)) { /* if has_volatile_hardlinks is true, the nlink value is not reliable */ if (!disk->has_volatile_hardlinks && st->st_nlink == 1) { /* LCOV_EXCL_START */ log_fatal("Internal inode '%" PRIu64 "' inconsistency for files '%s%s' and '%s%s' with same inode but different attributes: size %" PRIu64 "?%" PRIu64 ", sec %" PRIu64 "?%" PRIu64 ", nsec %d?%d\n", file->inode, disk->dir, sub, disk->dir, file->sub, file->size, (uint64_t)st->st_size, file->mtime_sec, (uint64_t)st->st_mtime, file->mtime_nsec, STAT_NSEC(st)); os_abort(); /* LCOV_EXCL_STOP */ } /* LCOV_EXCL_START */ /* suppose it's hardlink with not synced metadata */ scan_link(scan, is_diff, sub, file->sub, FILE_IS_HARDLINK); return; /* LCOV_EXCL_STOP */ } /* assume a previously used inode, it's the worst case */ /* and we handle it removing the duplicate stored inode. */ /* If the file is found by name later, it will have the inode restored, */ /* otherwise, it will get removed */ /* remove from the inode set */ tommy_hashdyn_remove_existing(&disk->inodeset, &file->nodeset); /* clear the inode */ /* this is not really needed for correct functionality */ /* because we are going to set FILE_IS_WITHOUT_INODE */ /* but it's easier for debugging to have invalid inodes set to 0 */ file->inode = 0; /* mark as missing inode */ file_flag_set(file, FILE_IS_WITHOUT_INODE); /* go further to find it by name */ } /* initialize for later overwrite */ is_file_reported = 0; is_original_file_size_different_than_zero = 0; /* then try finding it by name */ file = tommy_hashdyn_search(&disk->pathset, file_path_compare_to_arg, sub, file_path_hash(sub)); /* keep track if the file already exists */ is_file_already_present = file != 0; if (is_file_already_present) { /* if the file is without an inode */ if (file_flag_has(file, FILE_IS_WITHOUT_INODE)) { /* set it now */ file->inode = st->st_ino; /* insert in the set */ tommy_hashdyn_insert(&disk->inodeset, &file->nodeset, file, file_inode_hash(file->inode)); /* unmark as missing inode */ file_flag_clear(file, FILE_IS_WITHOUT_INODE); } else { /* here the inode has to be different, otherwise we would have found it before */ if (file->inode == st->st_ino) { /* LCOV_EXCL_START */ log_fatal("Internal inconsistency in inode '%" PRIu64 "' for files '%s%s' as unexpected matching\n", file->inode, disk->dir, sub); os_abort(); /* LCOV_EXCL_STOP */ } } /* for sure it cannot be already present */ if (file_flag_has(file, FILE_IS_PRESENT)) { /* LCOV_EXCL_START */ log_fatal("Internal inconsistency in path for file '%s%s' matching and already present\n", disk->dir, sub); os_abort(); /* LCOV_EXCL_STOP */ } /* check if the file is not changed */ if (file->size == st->st_size && file->mtime_sec == st->st_mtime && (file->mtime_nsec == STAT_NSEC(st) /* always accept the stored value if it's STAT_NSEC_INVALID */ /* it happens when upgrading from an old version of SnapRAID */ /* not yet supporting the nanosecond field */ || file->mtime_nsec == STAT_NSEC_INVALID ) ) { /* mark as present */ file_flag_set(file, FILE_IS_PRESENT); /* update the nano seconds mtime only if different */ /* to avoid unneeded updates */ if (file->mtime_nsec == STAT_NSEC_INVALID && STAT_NSEC(st) != STAT_NSEC_INVALID ) { file->mtime_nsec = STAT_NSEC(st); /* we have to save the new mtime */ state->need_write = 1; } /* if when processing the disk we used the past inodes values */ if (has_past_inodes) { /* if persistent inodes are supported, we are sure that the inode number */ /* is now different, because otherwise the file would have been found */ /* when searching by inode. */ /* if the inode is different, it means a rewritten file with the same path */ /* like when restoring a backup that restores also the timestamp */ ++scan->count_restore; log_tag("scan:restore:%s:%s\n", disk->name, esc_tag(sub, esc_buffer)); if (is_diff) { printf("restore %s\n", fmt_term(disk, sub, esc_buffer)); } /* remove from the inode set */ tommy_hashdyn_remove_existing(&disk->inodeset, &file->nodeset); /* save the new inode */ file->inode = st->st_ino; /* reinsert in the inode set */ tommy_hashdyn_insert(&disk->inodeset, &file->nodeset, file, file_inode_hash(file->inode)); /* we have to save the new inode */ state->need_write = 1; } else { /* otherwise it's the case of not persistent inode, where doesn't */ /* matter if the inode is different or equal, because they have no */ /* meaning, and then we don't even save them */ ++scan->count_equal; if (state->opt.gui) { log_tag("scan:equal:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer)); } } /* mark the file as kept */ scan_file_keep(scan, file); /* nothing more to do */ return; } /* here if the file is changed but with the correct name */ /* save the info for later printout */ file_already_present_size = file->size; file_already_present_mtime_sec = file->mtime_sec; file_already_present_mtime_nsec = file->mtime_nsec; /* keep track if the original file was not of zero size */ is_original_file_size_different_than_zero = file->size != 0; /* remove it, and continue to insert it again */ scan_file_remove(scan, file); /* and continue to insert it again */ } else { file_already_present_size = 0; file_already_present_mtime_sec = 0; file_already_present_mtime_nsec = 0; } /* refresh the info, to ensure that they are synced, */ /* note that we refresh only the info of the new or modified files */ /* because this is slow operation */ scan_file_refresh(scan, sub, st, &physical); #ifndef _WIN32 /* do a safety check to ensure that the common ext4 case of zeroing */ /* the size of a file after a crash doesn't propagate to the backup */ /* this check is specific for Linux, so we disable it on Windows */ if (is_original_file_size_different_than_zero && st->st_size == 0) { if (!state->opt.force_zero) { /* LCOV_EXCL_START */ log_fatal("The file '%s%s' has unexpected zero size!\n", disk->dir, sub); log_fatal("It's possible that after a kernel crash this file was lost,\n"); log_fatal("and you can use 'snapraid fix -f /%s' to recover it.\n", fmt_poll(disk, sub, esc_buffer)); if (!is_diff) { log_fatal("If this an expected condition you can '%s' anyway using 'snapraid --force-zero %s'\n", state->command, state->command); exit(EXIT_FAILURE); } /* LCOV_EXCL_STOP */ } } #else /* avoid the unused warning in Windows */ (void)is_original_file_size_different_than_zero; #endif /* insert it */ file = file_alloc(state->block_size, sub, st->st_size, st->st_mtime, STAT_NSEC(st), st->st_ino, physical); /* mark it as present */ file_flag_set(file, FILE_IS_PRESENT); /* if copy detection is enabled */ /* note that the copy detection is tried also for updated files */ /* this makes sense because it may happen to have two different copies */ /* of the same file, and we move the right one over the wrong one */ /* in such case we have a "copy" over an "update" */ if (!state->opt.force_nocopy) { tommy_uint32_t hash = file_stamp_hash(file->size, file->mtime_sec, file->mtime_nsec); /* search for a file with the same name and stamp in all the disks */ for (i = state->disklist; i != 0; i = i->next) { struct snapraid_disk* other_disk = i->data; struct snapraid_file* other_file; /* if the nanosecond part of the time stamp is valid, search */ /* for name and stamp, otherwise for path and stamp */ if (file->mtime_nsec != 0 && file->mtime_nsec != STAT_NSEC_INVALID) other_file = tommy_hashdyn_search(&other_disk->stampset, file_namestamp_compare, file, hash); else other_file = tommy_hashdyn_search(&other_disk->stampset, file_pathstamp_compare, file, hash); /* if found, and it's a fully hashed file */ if (other_file && file_is_full_hashed_and_stable(scan->state, other_disk, other_file)) { /* assume that the file is a copy, and reuse the hash */ file_copy(other_file, file); /* revert old counter and use the copy one */ ++scan->count_copy; log_tag("scan:copy:%s:%s:%s:%s\n", other_disk->name, esc_tag(other_file->sub, esc_buffer), disk->name, esc_tag(file->sub, esc_buffer_alt)); if (is_diff) { printf("copy %s -> %s\n", fmt_term(other_disk, other_file->sub, esc_buffer), fmt_term(disk, file->sub, esc_buffer_alt)); } /* mark it as reported */ is_file_reported = 1; /* no need to continue the search */ break; } } } /* if not yet reported, do it now */ /* we postpone this to avoid to print two times the copied files */ if (!is_file_reported) { if (is_file_already_present) { ++scan->count_change; log_tag("scan:update:%s:%s: %" PRIu64 " %" PRIu64 ".%d -> %" PRIu64 " %" PRIu64 ".%d\n", disk->name, esc_tag(sub, esc_buffer), file_already_present_size, file_already_present_mtime_sec, file_already_present_mtime_nsec, file->size, file->mtime_sec, file->mtime_nsec ); if (is_diff) { printf("update %s\n", fmt_term(disk, sub, esc_buffer)); } } else { ++scan->count_insert; log_tag("scan:add:%s:%s\n", disk->name, esc_tag(sub, esc_buffer)); if (is_diff) { printf("add %s\n", fmt_term(disk, sub, esc_buffer)); } } } /* insert the file in the delayed list */ scan_file_insert(scan, file); } /** * Remove the specified dir from the data set. */ static void scan_emptydir_remove(struct snapraid_scan* scan, struct snapraid_dir* dir) { struct snapraid_state* state = scan->state; struct snapraid_disk* disk = scan->disk; /* state changed */ state->need_write = 1; /* remove the file from the dir containers */ tommy_hashdyn_remove_existing(&disk->dirset, &dir->nodeset); tommy_list_remove_existing(&disk->dirlist, &dir->nodelist); /* deallocate */ dir_free(dir); } /** * Insert the specified dir in the data set. */ static void scan_emptydir_insert(struct snapraid_scan* scan, struct snapraid_dir* dir) { struct snapraid_state* state = scan->state; struct snapraid_disk* disk = scan->disk; /* state changed */ state->need_write = 1; /* insert the dir in the dir containers */ tommy_hashdyn_insert(&disk->dirset, &dir->nodeset, dir, dir_name_hash(dir->sub)); tommy_list_insert_tail(&disk->dirlist, &dir->nodelist, dir); } /** * Process a dir. */ static void scan_emptydir(struct snapraid_scan* scan, const char* sub) { struct snapraid_disk* disk = scan->disk; struct snapraid_dir* dir; /* check if the dir already exists */ dir = tommy_hashdyn_search(&disk->dirset, dir_name_compare, sub, dir_name_hash(sub)); if (dir) { /* check if multiple files have the same name */ if (dir_flag_has(dir, FILE_IS_PRESENT)) { /* LCOV_EXCL_START */ log_fatal("Internal inconsistency for dir '%s%s'\n", disk->dir, sub); os_abort(); /* LCOV_EXCL_STOP */ } /* mark as present */ dir_flag_set(dir, FILE_IS_PRESENT); /* nothing more to do */ return; } else { /* and continue to insert it */ } /* insert it */ dir = dir_alloc(sub); /* mark it as present */ dir_flag_set(dir, FILE_IS_PRESENT); /* insert it in the delayed insert list */ tommy_list_insert_tail(&scan->dir_insert_list, &dir->nodelist, dir); } struct dirent_sorted { /* node for data structures */ tommy_node node; #if HAVE_STRUCT_DIRENT_D_INO uint64_t d_ino; /**< Inode number. */ #endif #if HAVE_STRUCT_DIRENT_D_TYPE uint32_t d_type; /**< File type. */ #endif #if HAVE_STRUCT_DIRENT_D_STAT struct stat d_stat; /**< Stat result. */ #endif char d_name[]; /**< Variable length name. It must be the last field. */ }; #if HAVE_STRUCT_DIRENT_D_INO static int dd_ino_compare(const void* void_a, const void* void_b) { const struct dirent_sorted* a = void_a; const struct dirent_sorted* b = void_b; if (a->d_ino < b->d_ino) return -1; if (a->d_ino > b->d_ino) return 1; return 0; } #endif static int dd_name_compare(const void* void_a, const void* void_b) { const struct dirent_sorted* a = void_a; const struct dirent_sorted* b = void_b; return strcmp(a->d_name, b->d_name); } /** * Return the stat info of a dir entry. */ #if HAVE_STRUCT_DIRENT_D_STAT #define DSTAT(file, dd, buf) dstat(dd) struct stat* dstat(struct dirent_sorted* dd) { return &dd->d_stat; } #else #define DSTAT(file, dd, buf) dstat(file, buf) struct stat* dstat(const char* file, struct stat* st) { if (lstat(file, st) != 0) { /* LCOV_EXCL_START */ log_fatal("Error in stat file/directory '%s'. %s.\n", file, strerror(errno)); exit(EXIT_FAILURE); /* LCOV_EXCL_STOP */ } return st; } #endif /** * Process a directory. * Return != 0 if at least one file or link is processed. */ static int scan_dir(struct snapraid_scan* scan, int level, int is_diff, const char* dir, const char* sub) { struct snapraid_state* state = scan->state; struct snapraid_disk* disk = scan->disk; int processed = 0; DIR* d; tommy_list list; tommy_node* node; tommy_list_init(&list); d = opendir(dir); if (!d) { /* LCOV_EXCL_START */ log_fatal("Error opening directory '%s'. %s.\n", dir, strerror(errno)); if (level == 0) log_fatal("If this is the disk mount point, remember to create it manually\n"); else log_fatal("If it's a permission problem, you can exclude it in the config file with:\n\texclude /%s\n", sub); exit(EXIT_FAILURE); /* LCOV_EXCL_STOP */ } /* read the full directory */ while (1) { char path_next[PATH_MAX]; char sub_next[PATH_MAX]; struct dirent_sorted* entry; const char* name; struct dirent* dd; size_t name_len; /* * Clear errno to differentiate the end of the stream and an error condition * * From the Linux readdir() manpage: * "If the end of the directory stream is reached, NULL is returned and errno is not changed. * If an error occurs, NULL is returned and errno is set appropriately." */ errno = 0; dd = readdir(d); if (dd == 0 && errno != 0) { /* LCOV_EXCL_START */ log_fatal("Error reading directory '%s'. %s.\n", dir, strerror(errno)); log_fatal("You can exclude it in the config file with:\n\texclude /%s\n", sub); exit(EXIT_FAILURE); /* LCOV_EXCL_STOP */ } if (dd == 0) { break; /* finished */ } /* skip "." and ".." files */ name = dd->d_name; if (name[0] == '.' && (name[1] == 0 || (name[1] == '.' && name[2] == 0))) continue; pathprint(path_next, sizeof(path_next), "%s%s", dir, name); pathprint(sub_next, sizeof(sub_next), "%s%s", sub, name); /* check for not supported file names */ if (name[0] == 0) { /* LCOV_EXCL_START */ log_fatal("Unsupported name '%s' in file '%s'.\n", name, path_next); exit(EXIT_FAILURE); /* LCOV_EXCL_STOP */ } /* exclude hidden files even before calling lstat() */ if (filter_hidden(state->filter_hidden, dd) != 0) { msg_verbose("Excluding hidden '%s'\n", path_next); continue; } /* exclude content files even before calling lstat() */ if (filter_content(&state->contentlist, path_next) != 0) { msg_verbose("Excluding content '%s'\n", path_next); continue; } name_len = strlen(dd->d_name); entry = malloc_nofail(sizeof(struct dirent_sorted) + name_len + 1); /* copy the dir entry */ #if HAVE_STRUCT_DIRENT_D_INO entry->d_ino = dd->d_ino; #endif #if HAVE_STRUCT_DIRENT_D_TYPE entry->d_type = dd->d_type; #endif #if HAVE_STRUCT_DIRENT_D_STAT /* convert dirent to lstat result */ dirent_lstat(dd, &entry->d_stat); /* note that at this point the st_mode may be 0 */ #endif memcpy(entry->d_name, dd->d_name, name_len + 1); /* insert in the list */ tommy_list_insert_tail(&list, &entry->node, entry); } if (closedir(d) != 0) { /* LCOV_EXCL_START */ log_fatal("Error closing directory '%s'. %s.\n", dir, strerror(errno)); exit(EXIT_FAILURE); /* LCOV_EXCL_STOP */ } if (state->opt.force_order == SORT_ALPHA) { /* if requested sort alphabetically */ /* this is mainly done for testing to ensure to always */ /* process in the same way in different platforms */ tommy_list_sort(&list, dd_name_compare); } #if HAVE_STRUCT_DIRENT_D_INO else if (!disk->has_volatile_inodes) { /* if inodes are persistent */ /* sort the list of dir entries by inodes */ tommy_list_sort(&list, dd_ino_compare); } /* otherwise just keep the insertion order */ #endif /* process the sorted dir entries */ node = list; while (node != 0) { char path_next[PATH_MAX]; char sub_next[PATH_MAX]; char out[PATH_MAX]; struct snapraid_filter* reason = 0; struct dirent_sorted* dd = node->data; const char* name = dd->d_name; struct stat* st; int type; #if !HAVE_STRUCT_DIRENT_D_STAT struct stat st_buf; #endif pathprint(path_next, sizeof(path_next), "%s%s", dir, name); pathprint(sub_next, sizeof(sub_next), "%s%s", sub, name); /* start with an unknown type */ type = -1; st = 0; /* if dirent has the type, use it */ #if HAVE_STRUCT_DIRENT_D_TYPE switch (dd->d_type) { case DT_UNKNOWN : break; case DT_REG : type = 0; break; case DT_LNK : type = 1; break; case DT_DIR : type = 2; break; default : type = 3; break; } #endif /* if type is still unknown */ if (type < 0) { /* get the type from stat */ st = DSTAT(path_next, dd, &st_buf); #if HAVE_STRUCT_DIRENT_D_STAT /* if the st_mode field is missing, takes care to fill it using normal lstat() */ /* at now this can happen only in Windows (with HAVE_STRUCT_DIRENT_D_STAT defined), */ /* because we use a directory reading method that doesn't read info about ReparsePoint. */ /* Note that here we cannot call here lstat_sync(), because we don't know what kind */ /* of file is it, and lstat_sync() doesn't always work */ if (st->st_mode == 0) { if (lstat(path_next, st) != 0) { /* LCOV_EXCL_START */ log_fatal("Error in stat file/directory '%s'. %s.\n", path_next, strerror(errno)); exit(EXIT_FAILURE); /* LCOV_EXCL_STOP */ } } #endif if (S_ISREG(st->st_mode)) type = 0; else if (S_ISLNK(st->st_mode)) type = 1; else if (S_ISDIR(st->st_mode)) type = 2; else type = 3; } if (type == 0) { /* REG */ if (filter_path(&state->filterlist, &reason, disk->name, sub_next) == 0) { /* late stat, if not yet called */ if (!st) st = DSTAT(path_next, dd, &st_buf); #if HAVE_LSTAT_SYNC /* if the st_ino field is missing, takes care to fill it using the extended lstat() */ /* this can happen only in Windows */ if (st->st_ino == 0 || st->st_nlink == 0) { if (lstat_sync(path_next, st, 0) != 0) { /* LCOV_EXCL_START */ log_fatal("Error in stat file '%s'. %s.\n", path_next, strerror(errno)); exit(EXIT_FAILURE); /* LCOV_EXCL_STOP */ } } #endif scan_file(scan, is_diff, sub_next, st, FILEPHY_UNREAD_OFFSET); processed = 1; } else { msg_verbose("Excluding file '%s' for rule '%s'\n", path_next, filter_type(reason, out, sizeof(out))); } } else if (type == 1) { /* LNK */ if (filter_path(&state->filterlist, &reason, disk->name, sub_next) == 0) { char subnew[PATH_MAX]; int ret; ret = readlink(path_next, subnew, sizeof(subnew)); if (ret >= PATH_MAX) { /* LCOV_EXCL_START */ log_fatal("Error in readlink file '%s'. Symlink too long.\n", path_next); exit(EXIT_FAILURE); /* LCOV_EXCL_STOP */ } if (ret < 0) { /* LCOV_EXCL_START */ log_fatal("Error in readlink file '%s'. %s.\n", path_next, strerror(errno)); exit(EXIT_FAILURE); /* LCOV_EXCL_STOP */ } if (ret == 0) log_fatal("WARNING! Empty symbolic link '%s'.\n", path_next); /* readlink doesn't put the final 0 */ subnew[ret] = 0; /* process as a symbolic link */ scan_link(scan, is_diff, sub_next, subnew, FILE_IS_SYMLINK); processed = 1; } else { msg_verbose("Excluding link '%s' for rule '%s'\n", path_next, filter_type(reason, out, sizeof(out))); } } else if (type == 2) { /* DIR */ if (filter_subdir(&state->filterlist, &reason, disk->name, sub_next) == 0) { #ifndef _WIN32 /* late stat, if not yet called */ if (!st) st = DSTAT(path_next, dd, &st_buf); /* in Unix don't follow mount points in different devices */ /* in Windows we are already skipping them reporting them as special files */ if ((uint64_t)st->st_dev != disk->device) { log_fatal("WARNING! Ignoring mount point '%s' because it appears to be in a different device\n", path_next); } else #endif { char sub_dir[PATH_MAX]; /* recurse */ pathslash(path_next, sizeof(path_next)); pathcpy(sub_dir, sizeof(sub_dir), sub_next); pathslash(sub_dir, sizeof(sub_dir)); if (scan_dir(scan, level + 1, is_diff, path_next, sub_dir) == 0) { /* scan the directory as empty dir */ scan_emptydir(scan, sub_next); } /* or we processed something internally, or we have added the empty dir */ processed = 1; } } else { msg_verbose("Excluding directory '%s' for rule '%s'\n", path_next, filter_type(reason, out, sizeof(out))); } } else { if (filter_path(&state->filterlist, &reason, disk->name, sub_next) == 0) { /* late stat, if not yet called */ if (!st) st = DSTAT(path_next, dd, &st_buf); log_fatal("WARNING! Ignoring special '%s' file '%s'\n", stat_desc(st), path_next); } else { msg_verbose("Excluding special file '%s' for rule '%s'\n", path_next, filter_type(reason, out, sizeof(out))); } } /* next entry */ node = node->next; /* free the present one */ free(dd); } return processed; } static int state_diffscan(struct snapraid_state* state, int is_diff) { tommy_node* i; tommy_node* j; tommy_list scanlist; int done; fptr* msg; struct snapraid_scan total; int no_difference; char esc_buffer[ESC_MAX]; tommy_list_init(&scanlist); if (is_diff) msg_progress("Comparing...\n"); /* first scan all the directory and find new and deleted files */ for (i = state->disklist; i != 0; i = i->next) { struct snapraid_disk* disk = i->data; struct snapraid_scan* scan; tommy_node* node; int ret; int has_persistent_inodes; int has_syncronized_hardlinks; scan = malloc_nofail(sizeof(struct snapraid_scan)); scan->state = state; scan->disk = disk; scan->count_equal = 0; scan->count_move = 0; scan->count_copy = 0; scan->count_restore = 0; scan->count_change = 0; scan->count_remove = 0; scan->count_insert = 0; tommy_list_init(&scan->file_insert_list); tommy_list_init(&scan->link_insert_list); tommy_list_init(&scan->dir_insert_list); tommy_list_insert_tail(&scanlist, &scan->node, scan); if (!is_diff) msg_progress("Scanning disk %s...\n", disk->name); /* check if the disk supports persistent inodes */ ret = fsinfo(disk->dir, &has_persistent_inodes, &has_syncronized_hardlinks, 0, 0); if (ret < 0) { /* LCOV_EXCL_START */ log_fatal("Error accessing disk '%s' to get file-system info. %s.\n", disk->dir, strerror(errno)); exit(EXIT_FAILURE); /* LCOV_EXCL_STOP */ } if (!has_persistent_inodes) { disk->has_volatile_inodes = 1; } if (!has_syncronized_hardlinks) { disk->has_volatile_hardlinks = 1; } /* if inodes or UUID are not persistent/changed/unsupported */ if (disk->has_volatile_inodes || disk->has_different_uuid || disk->has_unsupported_uuid) { /* remove all the inodes from the inode collection */ /* if they are not persistent, all of them could be changed now */ /* and we don't want to find false matching ones */ /* see scan_file() for more details */ node = disk->filelist; while (node) { struct snapraid_file* file = node->data; node = node->next; /* remove from the inode set */ tommy_hashdyn_remove_existing(&disk->inodeset, &file->nodeset); /* clear the inode */ file->inode = 0; /* mark as missing inode */ file_flag_set(file, FILE_IS_WITHOUT_INODE); } } scan_dir(scan, 0, is_diff, disk->dir, ""); } /* we split the search in two phases because to detect files */ /* moved from one disk to another we have to start deletion */ /* only when all disks have all the new files found */ /* now process all the new and deleted files */ for (i = scanlist; i != 0; i = i->next) { struct snapraid_scan* scan = i->data; struct snapraid_disk* disk = scan->disk; tommy_node* node; unsigned phy_count; unsigned phy_dup; uint64_t phy_last; struct snapraid_file* phy_file_last; /* check for removed files */ node = disk->filelist; while (node) { struct snapraid_file* file = node->data; /* next node */ node = node->next; /* remove if not present */ if (!file_flag_has(file, FILE_IS_PRESENT)) { ++scan->count_remove; log_tag("scan:remove:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer)); if (is_diff) { printf("remove %s\n", fmt_term(disk, file->sub, esc_buffer)); } scan_file_remove(scan, file); } } /* check for removed links */ node = disk->linklist; while (node) { struct snapraid_link* slink = node->data; /* next node */ node = node->next; /* remove if not present */ if (!link_flag_has(slink, FILE_IS_PRESENT)) { ++scan->count_remove; log_tag("scan:remove:%s:%s\n", disk->name, esc_tag(slink->sub, esc_buffer)); if (is_diff) { printf("remove %s\n", fmt_term(disk, slink->sub, esc_buffer)); } scan_link_remove(scan, slink); } } /* check for removed dirs */ node = disk->dirlist; while (node) { struct snapraid_dir* dir = node->data; /* next node */ node = node->next; /* remove if not present */ if (!dir_flag_has(dir, FILE_IS_PRESENT)) { scan_emptydir_remove(scan, dir); } } /* sort the files before inserting them */ /* we use a stable sort to ensure that if the reported physical offset/inode */ /* are always 0, we keep at least the directory order */ switch (state->opt.force_order) { case SORT_PHYSICAL : tommy_list_sort(&scan->file_insert_list, file_physical_compare); break; case SORT_INODE : tommy_list_sort(&scan->file_insert_list, file_inode_compare); break; case SORT_ALPHA : tommy_list_sort(&scan->file_insert_list, file_path_compare); break; case SORT_DIR : /* already in order */ break; } /* insert all the new files, we insert them only after the deletion */ /* to reuse the just freed space */ /* also check if the physical offset reported are fakes or not */ node = scan->file_insert_list; phy_count = 0; phy_dup = 0; phy_last = FILEPHY_UNREAD_OFFSET; phy_file_last = 0; while (node) { struct snapraid_file* file = node->data; /* if the file is not empty, count duplicate physical offsets */ if (state->opt.force_order == SORT_PHYSICAL && file->size != 0) { if (phy_file_last != 0 && file->physical == phy_last /* files without offset are expected to have duplicates */ && phy_last != FILEPHY_WITHOUT_OFFSET ) { /* if verbose, print the list of duplicates real offsets */ /* other cases are for offsets not supported, so we don't need to report them file by file */ if (phy_last >= FILEPHY_REAL_OFFSET) { log_fatal("WARNING! Files '%s%s' and '%s%s' have the same physical offset %" PRId64 ".\n", disk->dir, phy_file_last->sub, disk->dir, file->sub, phy_last); } ++phy_dup; } phy_file_last = file; phy_last = file->physical; ++phy_count; } /* next node */ node = node->next; /* insert in the parity */ scan_file_allocate(scan, file); } /* mark the disk without reliable physical offset if it has duplicates */ /* here it should never happen because we already sorted out hardlinks */ if (state->opt.force_order == SORT_PHYSICAL && phy_dup > 0) { disk->has_unreliable_physical = 1; } /* insert all the new links */ node = scan->link_insert_list; while (node) { struct snapraid_link* slink = node->data; /* next node */ node = node->next; /* insert it */ scan_link_insert(scan, slink); } /* insert all the new dirs */ node = scan->dir_insert_list; while (node) { struct snapraid_dir* dir = node->data; /* next node */ node = node->next; /* insert it */ scan_emptydir_insert(scan, dir); } } /* check for disks where all the previously existing files where removed */ if (!state->opt.force_empty) { int all_missing = 0; int all_rewritten = 0; done = 0; for (i = state->disklist, j = scanlist; i != 0; i = i->next, j = j->next) { struct snapraid_disk* disk = i->data; struct snapraid_scan* scan = j->data; if (scan->count_equal == 0 && scan->count_move == 0 && scan->count_restore == 0 && (scan->count_remove != 0 || scan->count_change != 0) ) { if (!done) { done = 1; log_fatal("WARNING! All the files previously present in disk '%s' at dir '%s'", disk->name, disk->dir); } else { log_fatal(", disk '%s' at dir '%s'", disk->name, disk->dir); } /* detect the special condition of all files missing */ if (scan->count_change == 0) all_missing = 1; /* detect the special condition of all files rewritten */ if (scan->count_remove == 0) all_rewritten = 1; } } if (done) { log_fatal("\nare now missing or rewritten!\n"); if (all_rewritten) { log_fatal("This could happen when restoring a disk with a backup\n"); log_fatal("program that is not setting correctly the timestamps.\n"); } if (all_missing) { log_fatal("This could happen when some disks are not mounted\n"); log_fatal("in the expected directory.\n"); } if (!is_diff) { log_fatal("If you want to '%s' anyway, use 'snapraid --force-empty %s'.\n", state->command, state->command); exit(EXIT_FAILURE); } } } /* check for disks without the physical offset support */ if (state->opt.force_order == SORT_PHYSICAL) { done = 0; for (i = state->disklist; i != 0; i = i->next) { struct snapraid_disk* disk = i->data; if (disk->has_unreliable_physical) { if (!done) { done = 1; log_fatal("WARNING! Physical offsets not supported for disk '%s'", disk->name); } else { log_fatal(", '%s'", disk->name); } } } if (done) { log_fatal(". Files order won't be optimal.\n"); } } /* check for disks without persistent inodes */ done = 0; for (i = state->disklist; i != 0; i = i->next) { struct snapraid_disk* disk = i->data; if (disk->has_volatile_inodes) { if (!done) { done = 1; log_fatal("WARNING! Inodes are not persistent for disks: '%s'", disk->name); } else { log_fatal(", '%s'", disk->name); } } } if (done) { log_fatal(". Not using inodes to detect move operations.\n"); } /* check for disks with changed UUID */ done = 0; for (i = state->disklist; i != 0; i = i->next) { struct snapraid_disk* disk = i->data; /* don't print the message if the UUID changed because before */ /* it was no set. */ /* this is the normal condition for an empty disk because it */ /* isn't stored */ if (disk->has_different_uuid && !disk->had_empty_uuid) { if (!done) { done = 1; log_fatal("WARNING! UUID is changed for disks: '%s'", disk->name); } else { log_fatal(", '%s'", disk->name); } } } if (done) { log_fatal(". Not using inodes to detect move operations.\n"); } /* check for disks with unsupported UUID */ done = 0; for (i = state->disklist; i != 0; i = i->next) { struct snapraid_disk* disk = i->data; if (disk->has_unsupported_uuid) { if (!done) { done = 1; log_fatal("WARNING! UUID is unsupported for disks: '%s'", disk->name); } else { log_fatal(", '%s'", disk->name); } } } if (done) { log_fatal(". Not using inodes to detect move operations.\n"); #if defined(_linux) && !HAVE_BLKID log_fatal("The 'blkid' library is not linked in SnapRAID!\n"); log_fatal("Try rebuilding it after installing the libblkid-dev or libblkid-devel package.\n"); #endif } total.count_equal = 0; total.count_move = 0; total.count_copy = 0; total.count_restore = 0; total.count_change = 0; total.count_remove = 0; total.count_insert = 0; for (i = scanlist; i != 0; i = i->next) { struct snapraid_scan* scan = i->data; total.count_equal += scan->count_equal; total.count_move += scan->count_move; total.count_copy += scan->count_copy; total.count_restore += scan->count_restore; total.count_change += scan->count_change; total.count_remove += scan->count_remove; total.count_insert += scan->count_insert; } if (is_diff) { msg_status("\n"); msg = msg_status; } else { msg = msg_verbose; } msg("%8u equal\n", total.count_equal); msg("%8u added\n", total.count_insert); msg("%8u removed\n", total.count_remove); msg("%8u updated\n", total.count_change); msg("%8u moved\n", total.count_move); msg("%8u copied\n", total.count_copy); msg("%8u restored\n", total.count_restore); log_tag("summary:equal:%u\n", total.count_equal); log_tag("summary:added:%u\n", total.count_insert); log_tag("summary:removed:%u\n", total.count_remove); log_tag("summary:updated:%u\n", total.count_change); log_tag("summary:moved:%u\n", total.count_move); log_tag("summary:copied:%u\n", total.count_copy); log_tag("summary:restored:%u\n", total.count_restore); no_difference = !total.count_move && !total.count_copy && !total.count_restore && !total.count_change && !total.count_remove && !total.count_insert; if (is_diff) { if (no_difference) { msg_status("No differences\n"); } else { msg_status("There are differences!\n"); } } if (no_difference) { log_tag("summary:exit:equal\n"); } else { log_tag("summary:exit:diff\n"); } log_flush(); tommy_list_foreach(&scanlist, (tommy_foreach_func*)free); /* check the file-system on all disks */ state_fscheck(state, "after scan"); if (is_diff) { /* check for file difference */ if (!no_difference) return 1; /* check also for incomplete "sync" */ if (parity_is_invalid(state)) return 1; } return 0; } int state_diff(struct snapraid_state* state) { return state_diffscan(state, 1); } void state_scan(struct snapraid_state* state) { (void)state_diffscan(state, 0); /* ignore return value */ }