|
| 1 | +/* |
| 2 | + * This program is free software; you can redistribute it and/or |
| 3 | + * modify it under the terms of the GNU General Public |
| 4 | + * License v2 as published by the Free Software Foundation. |
| 5 | + * |
| 6 | + * This program is distributed in the hope that it will be useful, |
| 7 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 8 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 9 | + * General Public License for more details. |
| 10 | + * |
| 11 | + * You should have received a copy of the GNU General Public |
| 12 | + * License along with this program; if not, write to the |
| 13 | + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 14 | + * Boston, MA 021110-1307, USA. |
| 15 | + */ |
| 16 | + |
| 17 | +#include "kerncompat.h" |
| 18 | +#include "kernel-shared/disk-io.h" |
| 19 | +#include "kernel-shared/ctree.h" |
| 20 | +#include "kernel-shared/volumes.h" |
| 21 | +#include "common/messages.h" |
| 22 | +#include "common/open-utils.h" |
| 23 | +#include "cmds/rescue.h" |
| 24 | + |
| 25 | +/* |
| 26 | + * Record one corrupted data block. |
| 27 | + * |
| 28 | + * We do not report immediately, this is for future file deleting support. |
| 29 | + */ |
| 30 | +struct corrupted_block { |
| 31 | + struct list_head list; |
| 32 | + /* The logical bytenr of the exact corrupted block. */ |
| 33 | + u64 logical; |
| 34 | + |
| 35 | + /* The amount of mirrors above logical have. */ |
| 36 | + unsigned int num_mirrors; |
| 37 | + |
| 38 | + /* |
| 39 | + * Which mirror failed. |
| 40 | + * |
| 41 | + * Note, bit 0 means mirror 1, since mirror 0 means choosing a |
| 42 | + * live mirror, and we never utilized that mirror 0. |
| 43 | + */ |
| 44 | + unsigned long *error_mirror_bitmap; |
| 45 | +}; |
| 46 | + |
| 47 | +static int global_repair_mode; |
| 48 | +LIST_HEAD(corrupted_blocks); |
| 49 | + |
| 50 | +static int add_corrupted_block(struct btrfs_fs_info *fs_info, u64 logical, |
| 51 | + unsigned int mirror, unsigned int num_mirrors) |
| 52 | +{ |
| 53 | + struct corrupted_block *last; |
| 54 | + |
| 55 | + if (list_empty(&corrupted_blocks)) |
| 56 | + goto add; |
| 57 | + |
| 58 | + last = list_entry(corrupted_blocks.prev, struct corrupted_block, list); |
| 59 | + /* The last entry is the same, just set update the error mirror bitmap. */ |
| 60 | + if (last->logical == logical) { |
| 61 | + UASSERT(last->error_mirror_bitmap); |
| 62 | + set_bit(mirror, last->error_mirror_bitmap); |
| 63 | + return 0; |
| 64 | + } |
| 65 | +add: |
| 66 | + last = calloc(1, sizeof(*last)); |
| 67 | + if (!last) |
| 68 | + return -ENOMEM; |
| 69 | + last->error_mirror_bitmap = calloc(1, BITS_TO_LONGS(num_mirrors)); |
| 70 | + if (!last->error_mirror_bitmap) { |
| 71 | + free(last); |
| 72 | + return -ENOMEM; |
| 73 | + } |
| 74 | + set_bit(mirror - 1, last->error_mirror_bitmap); |
| 75 | + last->logical = logical; |
| 76 | + last->num_mirrors = num_mirrors; |
| 77 | + |
| 78 | + list_add_tail(&last->list, &corrupted_blocks); |
| 79 | + return 0; |
| 80 | +} |
| 81 | + |
| 82 | +/* |
| 83 | + * Verify all mirrors for @logical. |
| 84 | + * |
| 85 | + * If something critical happened, return <0 and should end the run immediately. |
| 86 | + * Otherwise return 0, including data checksum mismatch or read failure. |
| 87 | + */ |
| 88 | +static int verify_one_data_block(struct btrfs_fs_info *fs_info, |
| 89 | + struct extent_buffer *leaf, |
| 90 | + unsigned long leaf_offset, u64 logical, |
| 91 | + unsigned int num_mirrors) |
| 92 | +{ |
| 93 | + const u32 blocksize = fs_info->sectorsize; |
| 94 | + const u32 csum_size = fs_info->csum_size; |
| 95 | + u8 *buf; |
| 96 | + u8 csum[BTRFS_CSUM_SIZE]; |
| 97 | + u8 csum_expected[BTRFS_CSUM_SIZE]; |
| 98 | + int ret = 0; |
| 99 | + |
| 100 | + buf = malloc(blocksize); |
| 101 | + if (!buf) |
| 102 | + return -ENOMEM; |
| 103 | + |
| 104 | + for (int mirror = 1; mirror <= num_mirrors; mirror++) { |
| 105 | + u64 read_len = blocksize; |
| 106 | + |
| 107 | + ret = read_data_from_disk(fs_info, buf, logical, &read_len, mirror); |
| 108 | + if (ret < 0) { |
| 109 | + /* IO error, add one record. */ |
| 110 | + ret = add_corrupted_block(fs_info, logical, mirror, num_mirrors); |
| 111 | + if (ret < 0) |
| 112 | + break; |
| 113 | + } |
| 114 | + /* Verify the data checksum. */ |
| 115 | + btrfs_csum_data(fs_info, fs_info->csum_type, buf, csum, blocksize); |
| 116 | + read_extent_buffer(leaf, csum_expected, leaf_offset, csum_size); |
| 117 | + if (memcmp(csum_expected, csum, csum_size) != 0) { |
| 118 | + ret = add_corrupted_block(fs_info, logical, mirror, num_mirrors); |
| 119 | + if (ret < 0) |
| 120 | + break; |
| 121 | + } |
| 122 | + } |
| 123 | + |
| 124 | + free(buf); |
| 125 | + return ret; |
| 126 | +} |
| 127 | + |
| 128 | +static int iterate_one_csum_item(struct btrfs_fs_info *fs_info, struct btrfs_path *path) |
| 129 | +{ |
| 130 | + struct btrfs_key key; |
| 131 | + const unsigned long item_ptr_off = btrfs_item_ptr_offset(path->nodes[0], |
| 132 | + path->slots[0]); |
| 133 | + const u32 blocksize = fs_info->sectorsize; |
| 134 | + int num_mirrors; |
| 135 | + u64 data_size; |
| 136 | + u64 cur; |
| 137 | + char *buf; |
| 138 | + int ret = 0; |
| 139 | + |
| 140 | + buf = malloc(blocksize); |
| 141 | + if (!buf) |
| 142 | + return -ENOMEM; |
| 143 | + |
| 144 | + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); |
| 145 | + data_size = btrfs_item_size(path->nodes[0], path->slots[0]) / |
| 146 | + fs_info->csum_size * blocksize; |
| 147 | + num_mirrors = btrfs_num_copies(fs_info, key.offset, data_size); |
| 148 | + |
| 149 | + for (cur = 0; cur < data_size; cur += blocksize) { |
| 150 | + const unsigned long leaf_offset = item_ptr_off + |
| 151 | + cur / blocksize * fs_info->csum_size; |
| 152 | + |
| 153 | + ret = verify_one_data_block(fs_info, path->nodes[0], leaf_offset, |
| 154 | + key.offset + cur, num_mirrors); |
| 155 | + if (ret < 0) |
| 156 | + break; |
| 157 | + } |
| 158 | + free(buf); |
| 159 | + return ret; |
| 160 | +} |
| 161 | + |
| 162 | +static int iterate_csum_root(struct btrfs_fs_info *fs_info, struct btrfs_root *csum_root) |
| 163 | +{ |
| 164 | + struct btrfs_path path = { 0 }; |
| 165 | + struct btrfs_key key; |
| 166 | + int ret; |
| 167 | + |
| 168 | + key.objectid = 0; |
| 169 | + key.type = 0; |
| 170 | + key.offset = 0; |
| 171 | + |
| 172 | + ret = btrfs_search_slot(NULL, csum_root, &key, &path, 0, 0); |
| 173 | + if (ret < 0) { |
| 174 | + errno = -ret; |
| 175 | + error("failed to get the first tree block of csum tree: %m"); |
| 176 | + return ret; |
| 177 | + } |
| 178 | + UASSERT(ret > 0); |
| 179 | + while (true) { |
| 180 | + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); |
| 181 | + if (key.type != BTRFS_EXTENT_CSUM_KEY) |
| 182 | + goto next; |
| 183 | + ret = iterate_one_csum_item(fs_info, &path); |
| 184 | + if (ret < 0) |
| 185 | + break; |
| 186 | +next: |
| 187 | + ret = btrfs_next_item(csum_root, &path); |
| 188 | + if (ret > 0) { |
| 189 | + ret = 0; |
| 190 | + break; |
| 191 | + } |
| 192 | + if (ret < 0) { |
| 193 | + errno = -ret; |
| 194 | + error("failed to get next csum item: %m"); |
| 195 | + } |
| 196 | + } |
| 197 | + btrfs_release_path(&path); |
| 198 | + return ret; |
| 199 | +} |
| 200 | + |
| 201 | +static void report_corrupted_blocks(void) |
| 202 | +{ |
| 203 | + struct corrupted_block *entry; |
| 204 | + |
| 205 | + if (list_empty(&corrupted_blocks)) { |
| 206 | + pr_verbose(LOG_DEFAULT, "no data checksum mismatch found\n"); |
| 207 | + return; |
| 208 | + } |
| 209 | + |
| 210 | + list_for_each_entry(entry, &corrupted_blocks, list) { |
| 211 | + bool has_printed = false; |
| 212 | + |
| 213 | + pr_verbose(LOG_DEFAULT, "logical=%llu corrtuped mirrors=", entry->logical); |
| 214 | + /* Open coded bitmap print. */ |
| 215 | + for (int i = 0; i < entry->num_mirrors; i++) { |
| 216 | + if (test_bit(i, entry->error_mirror_bitmap)) { |
| 217 | + if (has_printed) |
| 218 | + pr_verbose(LOG_DEFAULT, ","); |
| 219 | + /* |
| 220 | + * Bit 0 means mirror 1, thus we need to increase |
| 221 | + * the value by 1. |
| 222 | + */ |
| 223 | + pr_verbose(LOG_DEFAULT, "%d", i + 1); |
| 224 | + has_printed=true; |
| 225 | + } |
| 226 | + } |
| 227 | + pr_verbose(LOG_DEFAULT, "\n"); |
| 228 | + } |
| 229 | +} |
| 230 | + |
| 231 | +static void free_corrupted_blocks(void) |
| 232 | +{ |
| 233 | + while (!list_empty(&corrupted_blocks)) { |
| 234 | + struct corrupted_block *entry; |
| 235 | + |
| 236 | + entry = list_entry(corrupted_blocks.next, struct corrupted_block, list); |
| 237 | + list_del_init(&entry->list); |
| 238 | + free(entry->error_mirror_bitmap); |
| 239 | + free(entry); |
| 240 | + } |
| 241 | +} |
| 242 | + |
| 243 | +int btrfs_recover_fix_data_checksum(const char *path, enum btrfs_fix_data_checksum_mode mode) |
| 244 | +{ |
| 245 | + struct btrfs_fs_info *fs_info; |
| 246 | + struct btrfs_root *csum_root; |
| 247 | + struct open_ctree_args oca = { 0 }; |
| 248 | + int ret; |
| 249 | + |
| 250 | + if (mode >= BTRFS_FIX_DATA_CSUMS_LAST) |
| 251 | + return -EINVAL; |
| 252 | + |
| 253 | + ret = check_mounted(path); |
| 254 | + if (ret < 0) { |
| 255 | + errno = -ret; |
| 256 | + error("could not check mount status: %m"); |
| 257 | + return ret; |
| 258 | + } |
| 259 | + if (ret > 0) { |
| 260 | + error("%s is currently mounted", path); |
| 261 | + return -EBUSY; |
| 262 | + } |
| 263 | + |
| 264 | + global_repair_mode = mode; |
| 265 | + oca.filename = path; |
| 266 | + oca.flags = OPEN_CTREE_WRITES; |
| 267 | + fs_info = open_ctree_fs_info(&oca); |
| 268 | + if (!fs_info) { |
| 269 | + error("failed to open btrfs at %s", path); |
| 270 | + return -EIO; |
| 271 | + } |
| 272 | + csum_root = btrfs_csum_root(fs_info, 0); |
| 273 | + if (!csum_root) { |
| 274 | + error("failed to get csum root"); |
| 275 | + ret = -EIO; |
| 276 | + goto out_close; |
| 277 | + } |
| 278 | + ret = iterate_csum_root(fs_info, csum_root); |
| 279 | + if (ret) { |
| 280 | + errno = -ret; |
| 281 | + error("failed to iterate csum tree: %m"); |
| 282 | + } |
| 283 | + report_corrupted_blocks(); |
| 284 | +out_close: |
| 285 | + free_corrupted_blocks(); |
| 286 | + close_ctree_fs_info(fs_info); |
| 287 | + return ret; |
| 288 | +} |
0 commit comments