| /* |
| drbdmeta.c |
| |
| This file is part of DRBD by Philipp Reisner and Lars Ellenberg. |
| |
| Copyright (C) 2004-2008, LINBIT Information Technologies GmbH |
| Copyright (C) 2004-2008, Philipp Reisner <philipp.reisner@linbit.com> |
| Copyright (C) 2004-2008, Lars Ellenberg <lars.ellenberg@linbit.com> |
| |
| drbd is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 2, or (at your option) |
| any later version. |
| |
| drbd is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with drbd; see the file COPYING. If not, write to |
| the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. |
| |
| */ |
| |
| /* have the <sys/....h> first, otherwise you get e.g. "redefined" types from |
| * sys/types.h and other weird stuff */ |
| |
| #define _GNU_SOURCE |
| #define _XOPEN_SOURCE 600 |
| #define _FILE_OFFSET_BITS 64 |
| |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| #include <sys/ioctl.h> |
| #include <sys/utsname.h> |
| #include <sys/time.h> |
| |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <errno.h> |
| #include <getopt.h> |
| #include <stdbool.h> |
| #include <string.h> |
| #include <unistd.h> |
| #include <fcntl.h> |
| #include <time.h> |
| |
| #include <linux/major.h> |
| #include <linux/kdev_t.h> |
| #include <linux/drbd.h> /* only use DRBD_MAGIC from here! */ |
| #include <linux/fs.h> /* for BLKFLSBUF */ |
| |
| #include "drbd_endian.h" |
| #include "drbdtool_common.h" |
| #include "drbd_strings.h" |
| #include "drbd_meta_data.h" |
| |
| #include "drbdmeta_parser.h" |
| |
| #include "config.h" |
| |
| /* BLKZEROOUT, available on linux-3.6 and later, |
| * and maybe backported to distribution kernels, |
| * even if they pretend to be older. |
| * Yes, we encountered a number of systems that already had it in their |
| * kernels, but not yet in the headers used to build userland stuff like this. |
| */ |
| #ifndef BLKZEROOUT |
| # define BLKZEROOUT _IO(0x12,127) |
| #endif |
| |
| extern FILE* yyin; |
| YYSTYPE yylval; |
| |
| int force = 0; |
| int verbose = 0; |
| int ignore_sanity_checks = 0; |
| int dry_run = 0; |
| int option_peer_max_bio_size = 0; |
| int option_node_id = -1; |
| unsigned option_al_stripes = 1; |
| unsigned option_al_stripe_size_4k = 8; |
| unsigned option_al_stripes_used = 0; |
| |
| struct option metaopt[] = { |
| { "ignore-sanity-checks", no_argument, &ignore_sanity_checks, 1000 }, |
| { "dry-run", no_argument, &dry_run, 1000 }, |
| { "force", no_argument, 0, 'f' }, |
| { "verbose", no_argument, 0, 'v' }, |
| { "peer-max-bio-size", required_argument, NULL, 'p' }, |
| { "node-id", required_argument, NULL, 'i' }, |
| { "al-stripes", required_argument, NULL, 's' }, |
| { "al-stripe-size-kB", required_argument, NULL, 'z' }, |
| { NULL, 0, 0, 0 }, |
| }; |
| |
| /* FIXME? should use sector_t and off_t, not long/uint64_t ... */ |
| |
| /* Note RETURN VALUES: |
| * exit code convention: int vXY_something() and meta_blah return some negative |
| * error code, usually -1, when failed, 0 for success. |
| * |
| * FIXME some of the return -1; probably should better be exit(something); |
| * or some of the exit() should be rather some return? |
| * |
| * AND, the exit codes should follow some defined scheme. |
| */ |
| |
| #if 0 |
| #define ASSERT(x) ((void)(0)) |
| #define d_expect(x) (x) |
| #else |
| #define ASSERT(x) do { if (!(x)) { \ |
| fprintf(stderr, "%s:%u:%s: ASSERT(%s) failed.\n", \ |
| __FILE__ , __LINE__ , __func__ , #x ); \ |
| abort(); } \ |
| } while (0) |
| #define d_expect(x) ({ \ |
| int _x = (x); \ |
| if (!_x) \ |
| fprintf(stderr, "%s:%u:%s: ASSERT(%s) failed.\n",\ |
| __FILE__ , __LINE__ , __func__ , #x ); \ |
| _x; }) |
| #endif |
| |
| static int confirmed(const char *text) |
| { |
| const char yes[] = "yes"; |
| const ssize_t N = sizeof(yes); |
| char *answer = NULL; |
| size_t n = 0; |
| int ok; |
| |
| fprintf(stderr, "\n%s\n", text); |
| |
| if (force) { |
| fprintf(stderr, "*** confirmation forced via --force option ***\n"); |
| ok = 1; |
| } |
| else { |
| fprintf(stderr, "[need to type '%s' to confirm] ", yes); |
| ok = getline(&answer,&n,stdin) == N && |
| strncmp(answer,yes,N-1) == 0; |
| free(answer); |
| fprintf(stderr, "\n"); |
| } |
| return ok; |
| } |
| |
| /* |
| * FIXME |
| * |
| * when configuring a drbd device: |
| * |
| * Require valid drbd meta data at the respective location. A meta data |
| * block would only be created by the drbdmeta command. |
| * |
| * (How) do we want to implement this: A meta data block contains some |
| * reference to the physical device it belongs. Refuse to attach not |
| * corresponding meta data. |
| * |
| * THINK: put a checksum within the on-disk meta data block, too? |
| * |
| * When asked to create a new meta data block, the drbdmeta command |
| * warns loudly if either the data device or the meta data device seem |
| * to contain some data, and requires explicit confirmation anyways. |
| * |
| * See current implementation in check_for_existing_data below. |
| * |
| * XXX should also be done for meta-data != internal, i.e. refuse to |
| * create meta data blocks on a device that seems to be in use for |
| * something else. |
| * |
| * Maybe with an external meta data device, we want to require a "meta |
| * data device super block", which could also serve as TOC to the meta |
| * data, once we have variable size meta data. Other option could be a |
| * /var/lib/drbd/md-toc plain file, and some magic block on every device |
| * that serves as md storage. |
| * |
| * For certain content on the lower level device, we should refuse |
| * always. e.g. refuse to be created on top of a LVM2 physical volume, |
| * or on top of swap space. This would require people to do an dd |
| * if=/dev/zero of=device. Protects them from shooting themselves, |
| * and blaming us... |
| */ |
| |
| /* reiserfs sb offset is 64k plus |
| * align it to 4k, in case someone has unusual hard sect size (!= 512), |
| * otherwise direct io will fail with EINVAL */ |
| #define SO_MUCH (68*1024) |
| |
| /* |
| * I think this block of declarations and definitions should be |
| * in some common.h, too. |
| * { |
| */ |
| |
| #ifndef ALIGN |
| # define ALIGN(x,a) ( ((x) + (a)-1) &~ ((a)-1) ) |
| #endif |
| |
| #define MD_AL_OFFSET_07 8 |
| #define MD_AL_MAX_SECT_07 64 |
| #define MD_BM_OFFSET_07 (MD_AL_OFFSET_07 + MD_AL_MAX_SECT_07) |
| #define MD_RESERVED_SECT_07 ( (uint64_t)(128ULL << 11) ) |
| #define MD_BM_MAX_BYTE_07 ( (uint64_t)(MD_RESERVED_SECT_07 - MD_BM_OFFSET_07)*512 ) |
| #if BITS_PER_LONG == 32 |
| #define MD_BM_MAX_BYTE_FLEX ( (uint64_t)(1ULL << (32-3)) ) |
| #else |
| #define MD_BM_MAX_BYTE_FLEX ( (uint64_t)(1ULL << (38-3)) ) |
| #endif |
| |
| #define DEFAULT_BM_BLOCK_SIZE (1<<12) |
| |
| #define DRBD_MD_MAGIC_06 (DRBD_MAGIC+2) |
| #define DRBD_MD_MAGIC_07 (DRBD_MAGIC+3) |
| #define DRBD_MD_MAGIC_08 (DRBD_MAGIC+4) |
| #define DRBD_MD_MAGIC_84_UNCLEAN (DRBD_MAGIC+5) |
| #define DRBD_MD_MAGIC_09 (DRBD_MAGIC+6) |
| |
| /* |
| * } |
| * end of should-be-shared |
| */ |
| |
| /* |
| * global variables and data types |
| */ |
| |
| /* buffer_size has to be a multiple of 4096, and at least 32k. |
| * Pending a "nice" implementation of replay_al_84 for striped activity log, |
| * I chose a big buffer hopefully large enough to hold the whole activity log, |
| * even with "large" number of stripes and stripe sizes. |
| * |
| * If you chose to change buffer_size, double check also fprintf_bm(), |
| * and how it calculates its chunk size. |
| */ |
| const size_t buffer_size = 32 * 1024 * 1024; |
| size_t pagesize; /* = sysconf(_SC_PAGESIZE) */ |
| int opened_odirect = 1; |
| void *on_disk_buffer = NULL; |
| int global_argc; |
| char **global_argv; |
| char *progname = NULL; |
| |
| enum md_format { |
| DRBD_V06, |
| DRBD_V07, |
| DRBD_V08, |
| DRBD_V09, |
| DRBD_UNKNOWN, |
| }; |
| |
| /* let gcc help us get it right. |
| * some explicit endian types */ |
| typedef struct { uint64_t le; } le_u64; |
| typedef struct { uint64_t be; } be_u64; |
| typedef struct { uint32_t le; } le_u32; |
| typedef struct { uint32_t be; } be_u32; |
| typedef struct { int32_t be; } be_s32; |
| typedef struct { uint16_t be; } be_u16; |
| typedef struct { unsigned long le; } le_ulong; |
| typedef struct { unsigned long be; } be_ulong; |
| |
| /* NOTE that this structure does not need to be packed, |
| * aligned, nor does it need to be in the same order as the on_disk variants. |
| */ |
| struct peer_md_cpu { |
| uint64_t bitmap_uuid; |
| uint64_t bitmap_dagtag; |
| uint32_t flags; |
| int32_t bitmap_index; |
| }; |
| |
| struct md_cpu { |
| uint64_t current_uuid; |
| uint64_t history_uuids[HISTORY_UUIDS]; |
| /* present since drbd 0.6 */ |
| uint32_t gc[GEN_CNT_SIZE]; /* generation counter */ |
| uint32_t magic; |
| /* added in drbd 0.7; |
| * 0.7 stores effevtive_size on disk as kb, 0.8 in units of sectors. |
| * we use sectors in our general working structure here */ |
| uint64_t effective_size; /* last agreed size */ |
| uint32_t md_size_sect; |
| int32_t al_offset; /* signed sector offset to this block */ |
| uint32_t al_nr_extents; /* important for restoring the AL */ |
| int32_t bm_offset; /* signed sector offset to the bitmap, from here */ |
| /* Since DRBD 0.8 we have uuid instead of gc */ |
| uint32_t flags; |
| uint64_t device_uuid; |
| uint32_t bm_bytes_per_bit; |
| uint32_t la_peer_max_bio_size; |
| /* Since DRBD 9.0 the following new stuff: */ |
| uint32_t max_peers; |
| int32_t node_id; |
| struct peer_md_cpu peers[DRBD_PEERS_MAX]; |
| uint32_t al_stripes; |
| uint32_t al_stripe_size_4k; |
| }; |
| |
| /* |
| * drbdmeta specific types |
| */ |
| |
| struct format_ops; |
| |
| struct format { |
| const struct format_ops *ops; |
| char *md_device_name; /* well, in 06 it is file name */ |
| char *drbd_dev_name; |
| unsigned minor; /* cache, determined from drbd_dev_name */ |
| int lock_fd; |
| int drbd_fd; /* no longer used! */ |
| int ll_fd; /* not yet used here */ |
| int md_fd; |
| int md_hard_sect_size; |
| |
| |
| /* unused in 06 */ |
| int md_index; |
| unsigned int bm_bytes; |
| unsigned int bits_set; /* 32 bit should be enough. @4k ==> 16TB */ |
| int bits_counted:1; |
| int update_lk_bdev:1; /* need to update the last known bdev info? */ |
| |
| struct md_cpu md; |
| |
| /* _byte_ offsets of our "super block" and other data, within fd */ |
| uint64_t md_offset; |
| uint64_t al_offset; |
| uint64_t bm_offset; |
| |
| /* if create_md actually does convert, |
| * we want to wipe the old meta data block _after_ convertion. */ |
| uint64_t wipe_fixed; |
| uint64_t wipe_flex; |
| uint64_t wipe_resize; |
| |
| /* convenience */ |
| uint64_t bd_size; /* size of block device for internal meta data */ |
| |
| /* size limit due to available on-disk bitmap */ |
| uint64_t max_usable_sect; |
| |
| /* last-known bdev info, |
| * to increase the chance of finding internal meta data in case the |
| * lower level device has been resized without telling DRBD. |
| * Loaded from file for internal metadata */ |
| struct bdev_info lk_bd; |
| }; |
| |
| /* - parse is expected to exit() if it does not work out. |
| * - open is expected to read the respective on_disk members, |
| * and copy the "superblock" meta data into the struct mem_cpu |
| * FIXME describe rest of them, and when they should exit, |
| * return error or success. |
| */ |
| struct format_ops { |
| const char *name; |
| char **args; |
| int (*parse) (struct format *, char **, int, int *); |
| int (*open) (struct format *); |
| int (*close) (struct format *); |
| int (*md_initialize) (struct format *, int do_disk_writes, int max_peers); |
| int (*md_disk_to_cpu) (struct format *); |
| int (*md_cpu_to_disk) (struct format *); |
| void (*get_gi) (struct md_cpu *md, int node_id); |
| void (*show_gi) (struct md_cpu *md, int node_id); |
| void (*set_gi) (struct md_cpu *md, int node_id, char **argv, int argc); |
| int (*outdate_gi) (struct md_cpu *md); |
| int (*invalidate_gi) (struct md_cpu *md); |
| }; |
| |
| struct format_ops f_ops[]; |
| /* |
| * -- DRBD 0.6 -------------------------------------- |
| */ |
| |
| struct __packed md_on_disk_06 { |
| be_u32 gc[GEN_CNT_SIZE]; /* generation counter */ |
| be_u32 magic; |
| }; |
| |
| void md_disk_06_to_cpu(struct md_cpu *cpu, const struct md_on_disk_06 *disk) |
| { |
| int i; |
| |
| memset(cpu, 0, sizeof(*cpu)); |
| for (i = 0; i < GEN_CNT_SIZE; i++) |
| cpu->gc[i] = be32_to_cpu(disk->gc[i].be); |
| cpu->magic = be32_to_cpu(disk->magic.be); |
| cpu->max_peers = 1; |
| } |
| |
| void md_cpu_to_disk_06(struct md_on_disk_06 *disk, struct md_cpu *cpu) |
| { |
| int i; |
| |
| for (i = 0; i < GEN_CNT_SIZE; i++) |
| disk->gc[i].be = cpu_to_be32(cpu->gc[i]); |
| disk->magic.be = cpu_to_be32(cpu->magic); |
| } |
| |
| int v06_validate_md(struct format *cfg) |
| { |
| if (cfg->md.magic != DRBD_MD_MAGIC_06) { |
| fprintf(stderr, "v06 Magic number not found\n"); |
| return -1; |
| } |
| return 0; |
| } |
| |
| /* |
| * -- DRBD 0.7 -------------------------------------- |
| */ |
| unsigned long bm_bytes(const struct md_cpu * const md, uint64_t sectors); |
| |
| struct __packed md_on_disk_07 { |
| be_u64 la_kb; /* last agreed size. */ |
| be_u32 gc[GEN_CNT_SIZE]; /* generation counter */ |
| be_u32 magic; |
| be_u32 md_size_kb; |
| be_s32 al_offset; /* signed sector offset to this block */ |
| be_u32 al_nr_extents; /* important for restoring the AL */ |
| be_s32 bm_offset; /* signed sector offset to the bitmap, from here */ |
| char reserved[8 * 512 - 48]; |
| }; |
| |
| void md_disk_07_to_cpu(struct md_cpu *cpu, const struct md_on_disk_07 *disk) |
| { |
| int i; |
| |
| memset(cpu, 0, sizeof(*cpu)); |
| cpu->effective_size = be64_to_cpu(disk->la_kb.be) << 1; |
| for (i = 0; i < GEN_CNT_SIZE; i++) |
| cpu->gc[i] = be32_to_cpu(disk->gc[i].be); |
| cpu->magic = be32_to_cpu(disk->magic.be); |
| cpu->md_size_sect = be32_to_cpu(disk->md_size_kb.be) << 1; |
| cpu->al_offset = be32_to_cpu(disk->al_offset.be); |
| cpu->al_nr_extents = be32_to_cpu(disk->al_nr_extents.be); |
| cpu->bm_offset = be32_to_cpu(disk->bm_offset.be); |
| cpu->bm_bytes_per_bit = DEFAULT_BM_BLOCK_SIZE; |
| cpu->max_peers = 1; |
| cpu->al_stripes = 1; |
| cpu->al_stripe_size_4k = 8; |
| } |
| |
| void md_cpu_to_disk_07(struct md_on_disk_07 *disk, const struct md_cpu * const cpu) |
| { |
| int i; |
| |
| disk->la_kb.be = cpu_to_be64(cpu->effective_size >> 1); |
| for (i = 0; i < GEN_CNT_SIZE; i++) |
| disk->gc[i].be = cpu_to_be32(cpu->gc[i]); |
| disk->magic.be = cpu_to_be32(cpu->magic); |
| disk->md_size_kb.be = cpu_to_be32(cpu->md_size_sect >> 1); |
| disk->al_offset.be = cpu_to_be32(cpu->al_offset); |
| disk->al_nr_extents.be = cpu_to_be32(cpu->al_nr_extents); |
| disk->bm_offset.be = cpu_to_be32(cpu->bm_offset); |
| memset(disk->reserved, 0, sizeof(disk->reserved)); |
| } |
| |
| int is_valid_md(enum md_format f, |
| const struct md_cpu * const md, const int md_index, const uint64_t ll_size) |
| { |
| uint64_t md_size_sect; |
| const char *v = f_ops[f].name; |
| int al_size_sect; |
| int n; |
| |
| ASSERT(f == DRBD_V07 || f == DRBD_V08 || f == DRBD_V09); |
| |
| if ((f == DRBD_V07 && md->magic != DRBD_MD_MAGIC_07) || |
| (f == DRBD_V08 && md->magic != DRBD_MD_MAGIC_08 |
| && md->magic != DRBD_MD_MAGIC_84_UNCLEAN) || |
| (f == DRBD_V09 && md->magic != DRBD_MD_MAGIC_09)) { |
| if (verbose >= 1) |
| fprintf(stderr, "%s Magic number not found\n", v); |
| return 0; |
| } |
| |
| if (md->max_peers < 1 || md->max_peers > DRBD_PEERS_MAX) { |
| fprintf(stderr, "%s max-peers value %d out of bounds\n", |
| v, md->max_peers); |
| return 0; |
| } |
| if (md->node_id < -1 || md->node_id > DRBD_PEERS_MAX + 1) { |
| fprintf(stderr, "%s device node-id value %d out of bounds\n", |
| v, md->node_id); |
| return 0; |
| } |
| for (n = 0; n < md->max_peers; n++) { |
| if (md->peers[n].bitmap_index < -1 || md->peers[n].bitmap_index > DRBD_PEERS_MAX + 1) { |
| fprintf(stderr, "%s peer device %d node-id value %d out of bounds\n", |
| v, n, md->peers[n].bitmap_index); |
| return 0; |
| } |
| } |
| |
| al_size_sect = md->al_stripes * md->al_stripe_size_4k * 8; |
| |
| switch(md_index) { |
| default: |
| case DRBD_MD_INDEX_INTERNAL: |
| case DRBD_MD_INDEX_FLEX_EXT: |
| if (md->al_offset != MD_AL_OFFSET_07) { |
| fprintf(stderr, "%s Magic number (al_offset) not found\n", v); |
| fprintf(stderr, "\texpected: %d, found %d\n", |
| MD_AL_OFFSET_07, md->al_offset); |
| return 0; |
| } |
| if (md->bm_offset != MD_AL_OFFSET_07 + al_size_sect) { |
| fprintf(stderr, "%s bm_offset: expected %d, found %d\n", v, |
| MD_AL_OFFSET_07 + al_size_sect, md->bm_offset); |
| return 0; |
| } |
| break; |
| case DRBD_MD_INDEX_FLEX_INT: |
| if (md->al_offset != -al_size_sect) { |
| fprintf(stderr, "%s al_offset: expected %d, found %d\n", v, |
| -al_size_sect, md->al_offset); |
| return 0; |
| } |
| |
| md_size_sect = bm_bytes(md, ll_size >> 9) >> 9; |
| md_size_sect = ALIGN(md_size_sect, 8); /* align on 4K blocks */ |
| /* plus the "drbd meta data super block", |
| * and the activity log; unit still sectors */ |
| md_size_sect += MD_AL_OFFSET_07 + al_size_sect; |
| |
| if (md->bm_offset != -(int64_t)md_size_sect + MD_AL_OFFSET_07) { |
| fprintf(stderr, "strange bm_offset %d (expected: "D64")\n", |
| md->bm_offset, -(int64_t)md_size_sect + MD_AL_OFFSET_07); |
| return 0; |
| }; |
| if (md->md_size_sect != md_size_sect) { |
| fprintf(stderr, "strange md_size_sect %u (expected: "U64")\n", |
| md->md_size_sect, md_size_sect); |
| if (f == DRBD_V08) return 0; |
| /* else not an error, |
| * was inconsistently implemented in v07 */ |
| } |
| break; |
| } |
| |
| /* FIXME consistency check, effevtive_size < ll_device_size, |
| * no overlap with internal meta data, |
| * no overlap of flexible meta data offsets/sizes |
| * ... |
| */ |
| |
| return 1; /* VALID */ |
| } |
| |
| /* |
| * these stay the same for 0.8, too: |
| */ |
| |
| struct al_sector_cpu { |
| uint32_t magic; |
| uint32_t tr_number; |
| struct { |
| uint32_t pos; |
| uint32_t extent; |
| } updates[62]; |
| uint32_t xor_sum; |
| }; |
| |
| struct __packed al_sector_on_disk { |
| be_u32 magic; |
| be_u32 tr_number; |
| struct __packed { |
| be_u32 pos; |
| be_u32 extent; |
| } updates[62]; |
| be_u32 xor_sum; |
| be_u32 pad; |
| }; |
| |
| int v07_al_disk_to_cpu(struct al_sector_cpu *al_cpu, struct al_sector_on_disk *al_disk) |
| { |
| uint32_t xor_sum = 0; |
| int i; |
| al_cpu->magic = be32_to_cpu(al_disk->magic.be); |
| al_cpu->tr_number = be32_to_cpu(al_disk->tr_number.be); |
| for (i = 0; i < 62; i++) { |
| al_cpu->updates[i].pos = be32_to_cpu(al_disk->updates[i].pos.be); |
| al_cpu->updates[i].extent = be32_to_cpu(al_disk->updates[i].extent.be); |
| xor_sum ^= al_cpu->updates[i].extent; |
| } |
| al_cpu->xor_sum = be32_to_cpu(al_disk->xor_sum.be); |
| return al_cpu->magic == DRBD_MAGIC && |
| al_cpu->xor_sum == xor_sum; |
| } |
| |
| /* |
| * -- DRBD 8.0, 8.2, 8.3 -------------------------------------- |
| */ |
| |
| struct __packed md_on_disk_08 { |
| be_u64 effective_size; /* last agreed size. */ |
| be_u64 uuid[UI_SIZE]; // UUIDs. |
| be_u64 device_uuid; |
| be_u64 reserved_u64_1; |
| be_u32 flags; |
| be_u32 magic; |
| be_u32 md_size_sect; |
| be_s32 al_offset; /* signed sector offset to this block */ |
| be_u32 al_nr_extents; /* important for restoring the AL */ |
| be_s32 bm_offset; /* signed sector offset to the bitmap, from here */ |
| be_u32 bm_bytes_per_bit; |
| be_u32 la_peer_max_bio_size; /* last peer max_bio_size */ |
| |
| /* see al_tr_number_to_on_disk_sector() */ |
| be_u32 al_stripes; |
| be_u32 al_stripe_size_4k; |
| |
| be_u32 reserved_u32[1]; |
| |
| char reserved[8 * 512 - (8*(UI_SIZE+3)+4*11)]; |
| }; |
| |
| void md_disk_08_to_cpu(struct md_cpu *cpu, const struct md_on_disk_08 *disk) |
| { |
| int i; |
| |
| memset(cpu, 0, sizeof(*cpu)); |
| cpu->effective_size = be64_to_cpu(disk->effective_size.be); |
| cpu->current_uuid = be64_to_cpu(disk->uuid[UI_CURRENT].be); |
| cpu->peers[0].bitmap_uuid = be64_to_cpu(disk->uuid[UI_BITMAP].be); |
| for (i = 0; i < HISTORY_UUIDS_V08; i++) |
| cpu->history_uuids[i] = |
| be64_to_cpu(disk->uuid[UI_HISTORY_START + i].be); |
| cpu->device_uuid = be64_to_cpu(disk->device_uuid.be); |
| cpu->flags = be32_to_cpu(disk->flags.be); |
| cpu->magic = be32_to_cpu(disk->magic.be); |
| cpu->md_size_sect = be32_to_cpu(disk->md_size_sect.be); |
| cpu->al_offset = be32_to_cpu(disk->al_offset.be); |
| cpu->al_nr_extents = be32_to_cpu(disk->al_nr_extents.be); |
| cpu->bm_offset = be32_to_cpu(disk->bm_offset.be); |
| cpu->bm_bytes_per_bit = be32_to_cpu(disk->bm_bytes_per_bit.be); |
| cpu->la_peer_max_bio_size = be32_to_cpu(disk->la_peer_max_bio_size.be); |
| cpu->max_peers = 1; |
| cpu->al_stripes = be32_to_cpu(disk->al_stripes.be); |
| cpu->al_stripe_size_4k = be32_to_cpu(disk->al_stripe_size_4k.be); |
| |
| /* not set? --> default to old fixed size activity log */ |
| if (cpu->al_stripes == 0 && cpu->al_stripe_size_4k == 0) { |
| cpu->al_stripes = 1; |
| cpu->al_stripe_size_4k = 8; |
| } |
| } |
| |
| void md_cpu_to_disk_08(struct md_on_disk_08 *disk, const struct md_cpu *cpu) |
| { |
| int i; |
| |
| memset(disk, 0, sizeof(*disk)); |
| |
| disk->effective_size.be = cpu_to_be64(cpu->effective_size); |
| disk->uuid[UI_CURRENT].be = cpu_to_be64(cpu->current_uuid); |
| disk->uuid[UI_BITMAP].be = cpu_to_be64(cpu->peers[0].bitmap_uuid); |
| for (i = 0; i < HISTORY_UUIDS_V08; i++) |
| disk->uuid[UI_HISTORY_START + i].be = |
| cpu_to_be64(cpu->history_uuids[i]); |
| disk->device_uuid.be = cpu_to_be64(cpu->device_uuid); |
| disk->flags.be = cpu_to_be32(cpu->flags); |
| disk->magic.be = cpu_to_be32(cpu->magic); |
| disk->md_size_sect.be = cpu_to_be32(cpu->md_size_sect); |
| disk->al_offset.be = cpu_to_be32(cpu->al_offset); |
| disk->al_nr_extents.be = cpu_to_be32(cpu->al_nr_extents); |
| disk->bm_offset.be = cpu_to_be32(cpu->bm_offset); |
| disk->bm_bytes_per_bit.be = cpu_to_be32(cpu->bm_bytes_per_bit); |
| disk->la_peer_max_bio_size.be = cpu_to_be32(cpu->la_peer_max_bio_size); |
| disk->al_stripes.be = cpu_to_be32(cpu->al_stripes); |
| disk->al_stripe_size_4k.be = cpu_to_be32(cpu->al_stripe_size_4k); |
| } |
| |
| /* |
| * -- DRBD 8.4 -------------------------------------- |
| */ |
| |
| /* new in 8.4: 4k al transaction blocks */ |
| #define AL_UPDATES_PER_TRANSACTION 64 |
| #define AL_CONTEXT_PER_TRANSACTION 919 |
| /* from DRBD 8.4 linux/drbd/drbd_limits.h, DRBD_AL_EXTENTS_MAX */ |
| #define AL_EXTENTS_MAX 65534 |
| enum al_transaction_types { |
| AL_TR_UPDATE = 0, |
| AL_TR_INITIALIZED = 0xffff |
| }; |
| struct __packed al_4k_transaction_on_disk { |
| /* don't we all like magic */ |
| be_u32 magic; |
| |
| /* to identify the most recent transaction block |
| * in the on disk ring buffer */ |
| be_u32 tr_number; |
| |
| /* checksum on the full 4k block, with this field set to 0. */ |
| be_u32 crc32c; |
| |
| /* type of transaction, special transaction types like: |
| * purge-all, set-all-idle, set-all-active, ... to-be-defined |
| * see also enum al_transaction_types */ |
| be_u16 transaction_type; |
| |
| /* we currently allow only a few thousand extents, |
| * so 16bit will be enough for the slot number. */ |
| |
| /* how many updates in this transaction */ |
| be_u16 n_updates; |
| |
| /* maximum slot number, "al-extents" in drbd.conf speak. |
| * Having this in each transaction should make reconfiguration |
| * of that parameter easier. */ |
| be_u16 context_size; |
| |
| /* slot number the context starts with */ |
| be_u16 context_start_slot_nr; |
| |
| /* Some reserved bytes. Expected usage is a 64bit counter of |
| * sectors-written since device creation, and other data generation tag |
| * supporting usage */ |
| be_u32 __reserved[4]; |
| |
| /* --- 36 byte used --- */ |
| |
| /* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes |
| * in one transaction, then use the remaining byte in the 4k block for |
| * context information. "Flexible" number of updates per transaction |
| * does not help, as we have to account for the case when all update |
| * slots are used anyways, so it would only complicate code without |
| * additional benefit. |
| */ |
| be_u16 update_slot_nr[AL_UPDATES_PER_TRANSACTION]; |
| |
| /* but the extent number is 32bit, which at an extent size of 4 MiB |
| * allows to cover device sizes of up to 2**54 Byte (16 PiB) */ |
| be_u32 update_extent_nr[AL_UPDATES_PER_TRANSACTION]; |
| |
| /* --- 420 bytes used (36 + 64*6) --- */ |
| |
| /* 4096 - 420 = 3676 = 919 * 4 */ |
| be_u32 context[AL_CONTEXT_PER_TRANSACTION]; |
| }; |
| |
| struct al_4k_cpu { |
| uint32_t magic; |
| uint32_t tr_number; |
| uint32_t crc32c; |
| uint16_t transaction_type; |
| uint16_t n_updates; |
| uint16_t context_size; |
| uint16_t context_start_slot_nr; |
| uint32_t __reserved[4]; |
| uint16_t update_slot_nr[AL_UPDATES_PER_TRANSACTION]; |
| uint32_t update_extent_nr[AL_UPDATES_PER_TRANSACTION]; |
| uint32_t context[AL_CONTEXT_PER_TRANSACTION]; |
| uint32_t is_valid; |
| }; |
| |
| |
| /* --- */ |
| |
| int v84_al_disk_to_cpu(struct al_4k_cpu *al_cpu, struct al_4k_transaction_on_disk *al_disk) |
| { |
| unsigned crc = 0; |
| unsigned i; |
| |
| al_cpu->magic = be32_to_cpu(al_disk->magic.be); |
| al_cpu->tr_number = be32_to_cpu(al_disk->tr_number.be); |
| al_cpu->crc32c = be32_to_cpu(al_disk->crc32c.be); |
| al_cpu->transaction_type = be16_to_cpu(al_disk->transaction_type.be); |
| al_cpu->n_updates = be16_to_cpu(al_disk->n_updates.be); |
| al_cpu->context_size = be16_to_cpu(al_disk->context_size.be); |
| al_cpu->context_start_slot_nr = be16_to_cpu(al_disk->context_start_slot_nr.be); |
| |
| /* reserverd al_disk->__reserved[4] */ |
| |
| for (i=0; i < AL_UPDATES_PER_TRANSACTION; i++) |
| al_cpu->update_slot_nr[i] = be16_to_cpu(al_disk->update_slot_nr[i].be); |
| for (i=0; i < AL_UPDATES_PER_TRANSACTION; i++) |
| al_cpu->update_extent_nr[i] = be32_to_cpu(al_disk->update_extent_nr[i].be); |
| for (i=0; i < AL_CONTEXT_PER_TRANSACTION; i++) |
| al_cpu->context[i] = be32_to_cpu(al_disk->context[i].be); |
| |
| al_disk->crc32c.be = 0; |
| crc = crc32c(crc, (void*)al_disk, 4096); |
| al_cpu->is_valid = (al_cpu->magic == DRBD_AL_MAGIC && al_cpu->crc32c == crc); |
| return al_cpu->is_valid; |
| } |
| |
| /* |
| * -- DRBD 9.0 -------------------------------------- |
| */ |
| /* struct meta_data_on_disk_9 is in drbd_meta_data.h */ |
| |
| void md_disk_09_to_cpu(struct md_cpu *cpu, const struct meta_data_on_disk_9 *disk) |
| { |
| int p, i; |
| |
| memset(cpu, 0, sizeof(*cpu)); |
| cpu->effective_size = be64_to_cpu(disk->effective_size.be); |
| cpu->device_uuid = be64_to_cpu(disk->device_uuid.be); |
| cpu->flags = be32_to_cpu(disk->flags.be); |
| cpu->magic = be32_to_cpu(disk->magic.be); |
| cpu->md_size_sect = be32_to_cpu(disk->md_size_sect.be); |
| cpu->al_offset = be32_to_cpu(disk->al_offset.be); |
| cpu->al_nr_extents = be32_to_cpu(disk->al_nr_extents.be); |
| cpu->bm_offset = be32_to_cpu(disk->bm_offset.be); |
| cpu->bm_bytes_per_bit = be32_to_cpu(disk->bm_bytes_per_bit.be); |
| cpu->la_peer_max_bio_size = be32_to_cpu(disk->la_peer_max_bio_size.be); |
| cpu->max_peers = be32_to_cpu(disk->bm_max_peers.be); |
| cpu->node_id = be32_to_cpu(disk->node_id.be); |
| cpu->al_stripes = be32_to_cpu(disk->al_stripes.be); |
| cpu->al_stripe_size_4k = be32_to_cpu(disk->al_stripe_size_4k.be); |
| |
| if (cpu->max_peers > DRBD_PEERS_MAX) |
| cpu->max_peers = DRBD_PEERS_MAX; |
| |
| cpu->current_uuid = be64_to_cpu(disk->current_uuid.be); |
| for (p = 0; p < DRBD_NODE_ID_MAX; p++) { |
| cpu->peers[p].flags = be32_to_cpu(disk->peers[p].flags.be); |
| cpu->peers[p].bitmap_index = be32_to_cpu(disk->peers[p].bitmap_index.be); |
| cpu->peers[p].bitmap_uuid = |
| be64_to_cpu(disk->peers[p].bitmap_uuid.be); |
| cpu->peers[p].bitmap_dagtag = |
| be64_to_cpu(disk->peers[p].bitmap_dagtag.be); |
| |
| } |
| BUILD_BUG_ON(ARRAY_SIZE(cpu->history_uuids) != ARRAY_SIZE(disk->history_uuids)); |
| for (i = 0; i < ARRAY_SIZE(cpu->history_uuids); i++) |
| cpu->history_uuids[i] = be64_to_cpu(disk->history_uuids[i].be); |
| } |
| |
| void md_cpu_to_disk_09(struct meta_data_on_disk_9 *disk, const struct md_cpu *cpu) |
| { |
| int p, i; |
| |
| memset(disk, 0, sizeof(*disk)); |
| disk->effective_size.be = cpu_to_be64(cpu->effective_size); |
| disk->device_uuid.be = cpu_to_be64(cpu->device_uuid); |
| disk->flags.be = cpu_to_be32(cpu->flags); |
| disk->magic.be = cpu_to_be32(cpu->magic); |
| disk->md_size_sect.be = cpu_to_be32(cpu->md_size_sect); |
| disk->al_offset.be = cpu_to_be32(cpu->al_offset); |
| disk->al_nr_extents.be = cpu_to_be32(cpu->al_nr_extents); |
| disk->bm_offset.be = cpu_to_be32(cpu->bm_offset); |
| disk->bm_bytes_per_bit.be = cpu_to_be32(cpu->bm_bytes_per_bit); |
| disk->la_peer_max_bio_size.be = cpu_to_be32(cpu->la_peer_max_bio_size); |
| disk->bm_max_peers.be = cpu_to_be32(cpu->max_peers); |
| disk->node_id.be = cpu_to_be32(cpu->node_id); |
| disk->al_stripes.be = cpu_to_be32(cpu->al_stripes); |
| disk->al_stripe_size_4k.be = cpu_to_be32(cpu->al_stripe_size_4k); |
| |
| disk->current_uuid.be = cpu_to_be64(cpu->current_uuid); |
| for (p = 0; p < DRBD_NODE_ID_MAX; p++) { |
| disk->peers[p].flags.be = cpu_to_be32(cpu->peers[p].flags); |
| disk->peers[p].bitmap_index.be = cpu_to_be32(cpu->peers[p].bitmap_index); |
| disk->peers[p].bitmap_uuid.be = |
| cpu_to_be64(cpu->peers[p].bitmap_uuid); |
| disk->peers[p].bitmap_dagtag.be = |
| cpu_to_be64(cpu->peers[p].bitmap_dagtag); |
| |
| } |
| BUILD_BUG_ON(ARRAY_SIZE(disk->history_uuids) != ARRAY_SIZE(cpu->history_uuids)); |
| for (i = 0; i < ARRAY_SIZE(disk->history_uuids); i++) |
| disk->history_uuids[i].be = cpu_to_be64(cpu->history_uuids[i]); |
| } |
| |
| /* |
| * -------------------------------------------------- |
| */ |
| |
| /* pre declarations */ |
| void m_get_gc(struct md_cpu *md, int node_id); |
| void m_show_gc(struct md_cpu *md, int node_id); |
| void m_set_gc(struct md_cpu *md, int node_id, char **argv, int argc); |
| int m_outdate_gc(struct md_cpu *md); |
| int m_invalidate_gc(struct md_cpu *md); |
| void m_get_uuid(struct md_cpu *md, int node_id); |
| void m_show_uuid(struct md_cpu *md, int node_id); |
| void m_set_uuid(struct md_cpu *md, int node_id, char **argv, int argc); |
| void m_get_v9_uuid(struct md_cpu *md, int node_id); |
| void m_show_v9_uuid(struct md_cpu *md, int node_id); |
| void m_set_v9_uuid(struct md_cpu *md, int node_id, char **argv, int argc); |
| int m_outdate_uuid(struct md_cpu *md); |
| int m_invalidate_uuid(struct md_cpu *md); |
| int m_invalidate_v9_uuid(struct md_cpu *md); |
| |
| int generic_md_close(struct format *cfg); |
| |
| int v06_md_cpu_to_disk(struct format *cfg); |
| int v06_md_disk_to_cpu(struct format *cfg); |
| int v06_parse(struct format *cfg, char **argv, int argc, int *ai); |
| int v06_md_open(struct format *cfg); |
| int v06_md_initialize(struct format *cfg, int do_disk_writes, int max_peers); |
| |
| int v07_md_cpu_to_disk(struct format *cfg); |
| int v07_md_disk_to_cpu(struct format *cfg); |
| int v07_parse(struct format *cfg, char **argv, int argc, int *ai); |
| int v07_md_initialize(struct format *cfg, int do_disk_writes, int max_peers); |
| |
| int v07_style_md_open(struct format *cfg); |
| |
| int v08_md_open(struct format *cfg); |
| int v08_md_cpu_to_disk(struct format *cfg); |
| int v08_md_disk_to_cpu(struct format *cfg); |
| int v08_md_initialize(struct format *cfg, int do_disk_writes, int max_peers); |
| int v08_md_close(struct format *cfg); |
| |
| int v09_md_disk_to_cpu(struct format *cfg); |
| int v09_md_cpu_to_disk(struct format *cfg); |
| int v09_md_initialize(struct format *cfg, int do_disk_writes, int max_peers); |
| |
| /* return codes for md_open */ |
| enum { |
| VALID_MD_FOUND = 0, |
| NO_VALID_MD_FOUND = -1, |
| VALID_MD_FOUND_AT_LAST_KNOWN_LOCATION = -2, |
| }; |
| |
| struct format_ops f_ops[] = { |
| [DRBD_V06] = { |
| .name = "v06", |
| .args = (char *[]){"minor", NULL}, |
| .parse = v06_parse, |
| .open = v06_md_open, |
| .close = generic_md_close, |
| .md_initialize = v06_md_initialize, |
| .md_disk_to_cpu = v06_md_disk_to_cpu, |
| .md_cpu_to_disk = v06_md_cpu_to_disk, |
| .get_gi = m_get_gc, |
| .show_gi = m_show_gc, |
| .set_gi = m_set_gc, |
| .outdate_gi = m_outdate_gc, |
| .invalidate_gi = m_invalidate_gc, |
| }, |
| [DRBD_V07] = { |
| .name = "v07", |
| .args = (char *[]){"device", "index", NULL}, |
| .parse = v07_parse, |
| .open = v07_style_md_open, |
| .close = generic_md_close, |
| .md_initialize = v07_md_initialize, |
| .md_disk_to_cpu = v07_md_disk_to_cpu, |
| .md_cpu_to_disk = v07_md_cpu_to_disk, |
| .get_gi = m_get_gc, |
| .show_gi = m_show_gc, |
| .set_gi = m_set_gc, |
| .outdate_gi = m_outdate_gc, |
| .invalidate_gi = m_invalidate_gc, |
| }, |
| [DRBD_V08] = { |
| .name = "v08", |
| .args = (char *[]){"device", "index", NULL}, |
| .parse = v07_parse, |
| .open = v08_md_open, |
| .close = v08_md_close, |
| .md_initialize = v08_md_initialize, |
| .md_disk_to_cpu = v08_md_disk_to_cpu, |
| .md_cpu_to_disk = v08_md_cpu_to_disk, |
| .get_gi = m_get_uuid, |
| .show_gi = m_show_uuid, |
| .set_gi = m_set_uuid, |
| .outdate_gi = m_outdate_uuid, |
| .invalidate_gi = m_invalidate_uuid, |
| }, |
| [DRBD_V09] = { |
| .name = "v09", |
| .args = (char *[]){"device", "index", NULL}, |
| .parse = v07_parse, |
| .open = v08_md_open, |
| .close = v08_md_close, |
| .md_initialize = v09_md_initialize, |
| .md_disk_to_cpu = v09_md_disk_to_cpu, |
| .md_cpu_to_disk = v09_md_cpu_to_disk, |
| .get_gi = m_get_v9_uuid, |
| .show_gi = m_show_v9_uuid, |
| .set_gi = m_set_v9_uuid, |
| .outdate_gi = m_outdate_uuid, |
| .invalidate_gi = m_invalidate_v9_uuid, |
| }, |
| }; |
| |
| static inline enum md_format format_version(struct format *cfg) |
| { |
| return (cfg->ops - f_ops); |
| } |
| static inline int is_v06(struct format *cfg) |
| { |
| return format_version(cfg) == DRBD_V06; |
| } |
| static inline int is_v07(struct format *cfg) |
| { |
| return format_version(cfg) == DRBD_V07; |
| } |
| static inline int is_v08(struct format *cfg) |
| { |
| return format_version(cfg) == DRBD_V08; |
| } |
| static inline int is_v09(struct format *cfg) |
| { |
| return format_version(cfg) == DRBD_V09; |
| } |
| |
| /****************************************** |
| Commands we know about: |
| ******************************************/ |
| |
| struct meta_cmd { |
| const char *name; |
| const char *args; |
| int (*function) (struct format *, char **argv, int argc); |
| int show_in_usage:1; |
| int node_id_required:1; |
| int modifies_md:1; |
| }; |
| /* Global command pointer, to be able to change behavior in helper functions |
| * based on which top-level command is being processed. */ |
| static struct meta_cmd *command; |
| |
| /* pre declarations */ |
| int meta_get_gi(struct format *cfg, char **argv, int argc); |
| int meta_show_gi(struct format *cfg, char **argv, int argc); |
| int meta_dump_md(struct format *cfg, char **argv, int argc); |
| int meta_apply_al(struct format *cfg, char **argv, int argc); |
| int meta_restore_md(struct format *cfg, char **argv, int argc); |
| int meta_verify_dump_file(struct format *cfg, char **argv, int argc); |
| int meta_create_md(struct format *cfg, char **argv, int argc); |
| int meta_wipe_md(struct format *cfg, char **argv, int argc); |
| int meta_outdate(struct format *cfg, char **argv, int argc); |
| int meta_invalidate(struct format *cfg, char **argv, int argc); |
| int meta_set_gi(struct format *cfg, char **argv, int argc); |
| int meta_read_dev_uuid(struct format *cfg, char **argv, int argc); |
| int meta_write_dev_uuid(struct format *cfg, char **argv, int argc); |
| int meta_dstate(struct format *cfg, char **argv, int argc); |
| int meta_chk_offline_resize(struct format *cfg, char **argv, int argc); |
| int meta_forget_peer(struct format *cfg, char **argv, int argc); |
| |
| struct meta_cmd cmds[] = { |
| {"get-gi", 0, meta_get_gi, 1, 1, 0}, |
| {"show-gi", 0, meta_show_gi, 1, 1, 0}, |
| {"dump-md", 0, meta_dump_md, 1, 0, 0}, |
| {"restore-md", "file", meta_restore_md, 1, 0, 1}, |
| {"verify-dump", "file", meta_verify_dump_file, 1, 0, 0}, |
| {"apply-al", 0, meta_apply_al, 1, 0, 1}, |
| {"wipe-md", 0, meta_wipe_md, 1, 0, 1}, |
| {"outdate", 0, meta_outdate, 1, 0, 1}, |
| {"invalidate", 0, meta_invalidate, 1, 0, 1}, |
| {"dstate", 0, meta_dstate, 1, 0, 0}, |
| {"read-dev-uuid", 0, meta_read_dev_uuid, 0, 0, 0}, |
| {"write-dev-uuid", "VAL", meta_write_dev_uuid, 0, 0, 1}, |
| /* FIXME: Get and set node and peer ids */ |
| {"set-gi", ":::VAL:VAL:...", meta_set_gi, 0, 1, 1}, |
| {"check-resize", 0, meta_chk_offline_resize, 1, 0, 1}, |
| {"create-md", |
| "[--peer-max-bio-size {val}] " |
| "[--al-stripes {val}] " |
| "[--al-stripe-size-kB {val}] " |
| "{max_peers}", |
| meta_create_md, 1, 0, 1}, |
| {"forget-peer", 0, meta_forget_peer, 1, 1, 1}, |
| }; |
| |
| /* |
| * generic helpers |
| */ |
| |
| #define PREAD(cfg,b,c,d) pread_or_die((cfg),(b),(c),(d), __func__ ) |
| #define PWRITE(cfg,b,c,d) pwrite_or_die((cfg),(b),(c),(d), __func__ ) |
| /* Do we want to exit() right here, |
| * or do we want to duplicate the error handling everywhere? */ |
| void pread_or_die(struct format *cfg, void *buf, size_t count, off_t offset, const char* tag) |
| { |
| int fd = cfg->md_fd; |
| ssize_t c = pread(fd, buf, count, offset); |
| if (verbose >= 2) { |
| fflush(stdout); |
| fprintf(stderr, " %-26s: pread(%u, ...,%6lu,%12llu)\n", tag, |
| fd, (unsigned long)count, (unsigned long long)offset); |
| if (count & ((1<<12)-1)) |
| fprintf(stderr, "\tcount will cause EINVAL on hard sect size != 512\n"); |
| if (offset & ((1<<12)-1)) |
| fprintf(stderr, "\toffset will cause EINVAL on hard sect size != 512\n"); |
| } |
| if (c < 0) { |
| fprintf(stderr,"pread(%u,...,%lu,%llu) in %s failed: %s\n", |
| fd, (unsigned long)count, (unsigned long long)offset, |
| tag, strerror(errno)); |
| exit(10); |
| } else if ((size_t)c != count) { |
| fprintf(stderr,"confused in %s: expected to read %d bytes," |
| " actually read %d\n", |
| tag, (int)count, (int)c); |
| exit(10); |
| } |
| if (verbose > 10) |
| fprintf_hex(stderr, offset, buf, count); |
| } |
| |
| #define min(x,y) ((x) < (y) ? (x) : (y)) |
| #define min3(x,y,z) (min(min(x,y),z)) |
| |
| void validate_offsets_or_die(struct format *cfg, size_t count, off_t offset, const char* tag) |
| { |
| off_t al_offset = cfg->md_offset + cfg->md.al_offset * 512LL; |
| off_t bm_offset = cfg->md_offset + cfg->md.bm_offset * 512LL; |
| off_t min_offset; |
| off_t max_offset; |
| |
| if (al_offset != cfg->al_offset) |
| fprintf(stderr, "%s: ambiguous al_offset: "U64" vs %llu\n", |
| tag, cfg->al_offset, (unsigned long long)al_offset); |
| if (bm_offset != cfg->bm_offset) |
| fprintf(stderr, "%s: ambiguous bm_offset: "U64" vs %llu\n", |
| tag, cfg->bm_offset, (unsigned long long)bm_offset); |
| min_offset = min3(cfg->md_offset, al_offset, bm_offset); |
| max_offset = min_offset + cfg->md.md_size_sect * 512LL; |
| if (min_offset < 0) |
| fprintf(stderr, "%s: negative minimum offset: %lld\n", tag, (long long)min_offset); |
| |
| /* If we wipe some old meta data block, |
| * that hopefully falls outside the range of the current meta data. |
| * Skip the range check below. */ |
| if (offset != 0 |
| && (offset == cfg->wipe_fixed |
| ||offset == cfg->wipe_flex |
| ||offset == cfg->wipe_resize)) |
| return; |
| |
| if (offset < min_offset || (offset + count) > max_offset) { |
| fprintf(stderr, "%s: offset+count ("U64"+%zu) not in meta data area range ["U64"; "U64"], aborted\n", |
| tag, offset, count, min_offset, max_offset); |
| if (ignore_sanity_checks) { |
| fprintf(stderr, "Ignored due to --ignore-sanity-checks\n"); |
| } else { |
| fprintf(stderr, "If you want to force this, tell me to --ignore-sanity-checks\n"); |
| exit(10); |
| } |
| } |
| } |
| |
| static unsigned n_writes = 0; |
| void pwrite_or_die(struct format *cfg, const void *buf, size_t count, off_t offset, const char* tag) |
| { |
| int fd = cfg->md_fd; |
| ssize_t c; |
| |
| validate_offsets_or_die(cfg, count, offset, tag); |
| |
| ++n_writes; |
| if (dry_run) { |
| fprintf(stderr, " %-26s: pwrite(%u, ...,%6lu,%12llu) SKIPPED DUE TO DRY-RUN\n", |
| tag, fd, (unsigned long)count, (unsigned long long)offset); |
| if (verbose > 10) |
| fprintf_hex(stderr, offset, buf, count); |
| return; |
| } |
| c = pwrite(fd, buf, count, offset); |
| if (verbose >= 2) { |
| fflush(stdout); |
| fprintf(stderr, " %-26s: pwrite(%u, ...,%6lu,%12llu)\n", tag, |
| fd, (unsigned long)count, (unsigned long long)offset); |
| if (count & ((1<<12)-1)) |
| fprintf(stderr, "\tcount will cause EINVAL on hard sect size != 512\n"); |
| if (offset & ((1<<12)-1)) |
| fprintf(stderr, "\toffset will cause EINVAL on hard sect size != 512\n"); |
| } |
| if (c < 0) { |
| fprintf(stderr,"pwrite(%u,...,%lu,%llu) in %s failed: %s\n", |
| fd, (unsigned long)count, (unsigned long long)offset, |
| tag, strerror(errno)); |
| exit(10); |
| } else if ((size_t)c != count) { |
| /* FIXME we might just now have corrupted the on-disk data */ |
| fprintf(stderr,"confused in %s: expected to write %d bytes," |
| " actually wrote %d\n", tag, (int)count, (int)c); |
| exit(10); |
| } |
| } |
| |
| size_t pwrite_with_limit_or_die(struct format *cfg, const void *buf, size_t count, off_t offset, off_t limit, const char* tag) |
| { |
| if (offset >= limit) { |
| fprintf(stderr,"confused in %s: offset (%llu) > limit (%llu)\n", |
| tag, (unsigned long long)offset, (unsigned long long)limit); |
| exit(10); |
| } |
| if (count > limit - offset) { |
| fprintf(stderr,"in %s: truncating byte count from %lu to %lu\n", tag, |
| (unsigned long)count, |
| (unsigned long)(limit -offset)); |
| count = limit - offset; |
| } |
| pwrite_or_die(cfg, buf, count, offset, tag); |
| return count; |
| } |
| |
| void m_get_gc(struct md_cpu *md, int node_id __attribute((unused))) |
| { |
| dt_print_gc(md->gc); |
| } |
| |
| void m_show_gc(struct md_cpu *md, int node_id __attribute((unused))) |
| { |
| dt_pretty_print_gc(md->gc); |
| } |
| |
| void m_get_uuid(struct md_cpu *md, int node_id) |
| { |
| uint64_t uuids[] = { |
| [UI_CURRENT] = md->current_uuid, |
| [UI_BITMAP] = md->peers[node_id].bitmap_uuid, |
| [UI_HISTORY_START] = md->history_uuids[0], |
| [UI_HISTORY_END] = md->history_uuids[1], |
| }; |
| |
| dt_print_uuids(uuids, md->flags); |
| } |
| |
| void m_show_uuid(struct md_cpu *md, int node_id) |
| { |
| uint64_t uuids[] = { |
| [UI_CURRENT] = md->current_uuid, |
| [UI_BITMAP] = md->peers[node_id].bitmap_uuid, |
| [UI_HISTORY_START] = md->history_uuids[0], |
| [UI_HISTORY_END] = md->history_uuids[1], |
| }; |
| |
| dt_pretty_print_uuids(uuids, md->flags); |
| } |
| |
| void m_get_v9_uuid(struct md_cpu *md, int node_id) |
| { |
| uint64_t uuids[] = { |
| [UI_CURRENT] = md->current_uuid, |
| [UI_BITMAP] = md->peers[node_id].bitmap_uuid, |
| [UI_HISTORY_START] = md->history_uuids[0], |
| [UI_HISTORY_END] = md->history_uuids[1], |
| }; |
| |
| dt_print_v9_uuids(uuids, md->flags, md->peers[node_id].flags); |
| } |
| |
| void m_show_v9_uuid(struct md_cpu *md, int node_id) |
| { |
| uint64_t uuids[] = { |
| [UI_CURRENT] = md->current_uuid, |
| [UI_BITMAP] = md->peers[node_id].bitmap_uuid, |
| [UI_HISTORY_START] = md->history_uuids[0], |
| [UI_HISTORY_END] = md->history_uuids[1], |
| }; |
| |
| dt_pretty_print_v9_uuids(uuids, md->flags, md->peers[node_id].flags); |
| } |
| |
| int m_strsep_u32(char **s, uint32_t *val) |
| { |
| char *t, *e; |
| unsigned long v; |
| |
| if ((t = strsep(s, ":"))) { |
| if (strlen(t)) { |
| e = t; |
| errno = 0; |
| v = strtoul(t, &e, 0); |
| if (*e != 0) { |
| fprintf(stderr, "'%s' is not a number.\n", *s); |
| exit(10); |
| } |
| if (errno) { |
| fprintf(stderr, "'%s': ", *s); |
| perror(0); |
| exit(10); |
| } |
| if (v > 0xFFffFFffUL) { |
| fprintf(stderr, |
| "'%s' is out of range (max 0xFFffFFff).\n", |
| *s); |
| exit(10); |
| } |
| *val = (uint32_t)v; |
| } |
| return 1; |
| } |
| return 0; |
| } |
| |
| int m_strsep_u64(char **s, uint64_t *val) |
| { |
| char *t, *e; |
| uint64_t v; |
| |
| if ((t = strsep(s, ":"))) { |
| if (strlen(t)) { |
| e = t; |
| errno = 0; |
| v = strto_u64(t, &e, 16); |
| if (*e != 0) { |
| fprintf(stderr, "'%s' is not a number.\n", *s); |
| exit(10); |
| } |
| if (errno) { |
| fprintf(stderr, "'%s': ", *s); |
| perror(0); |
| exit(10); |
| } |
| *val = v; |
| } |
| return 1; |
| } |
| return 0; |
| } |
| |
| int m_strsep_bit(char **s, uint32_t *val, int mask) |
| { |
| uint32_t d; |
| int rv; |
| |
| d = *val & mask ? 1 : 0; |
| |
| rv = m_strsep_u32(s, &d); |
| |
| if (d > 1) { |
| fprintf(stderr, "'%d' is not 0 or 1.\n", d); |
| exit(10); |
| } |
| |
| if (d) |
| *val |= mask; |
| else |
| *val &= ~mask; |
| |
| return rv; |
| } |
| |
| void m_set_gc(struct md_cpu *md, int node_id __attribute((unused)), char **argv, int argc __attribute((unused))) |
| { |
| char **str; |
| |
| str = &argv[0]; |
| |
| do { |
| if (!m_strsep_bit(str, &md->gc[Flags], MDF_CONSISTENT)) break; |
| if (!m_strsep_u32(str, &md->gc[HumanCnt])) break; |
| if (!m_strsep_u32(str, &md->gc[TimeoutCnt])) break; |
| if (!m_strsep_u32(str, &md->gc[ConnectedCnt])) break; |
| if (!m_strsep_u32(str, &md->gc[ArbitraryCnt])) break; |
| if (!m_strsep_bit(str, &md->gc[Flags], MDF_PRIMARY_IND)) break; |
| if (!m_strsep_bit(str, &md->gc[Flags], MDF_CONNECTED_IND)) break; |
| if (!m_strsep_bit(str, &md->gc[Flags], MDF_FULL_SYNC)) break; |
| } while (0); |
| } |
| |
| void m_set_uuid(struct md_cpu *md, int node_id, char **argv, int argc __attribute((unused))) |
| { |
| char **str; |
| int i; |
| |
| str = &argv[0]; |
| |
| do { |
| if (!m_strsep_u64(str, &md->current_uuid)) break; |
| if (!m_strsep_u64(str, &md->peers[node_id].bitmap_uuid)) break; |
| for (i = 0; i < HISTORY_UUIDS_V08; i++) |
| if (!m_strsep_u64(str, &md->history_uuids[i])) return; |
| if (!m_strsep_bit(str, &md->flags, MDF_CONSISTENT)) break; |
| if (!m_strsep_bit(str, &md->flags, MDF_WAS_UP_TO_DATE)) break; |
| if (!m_strsep_bit(str, &md->flags, MDF_PRIMARY_IND)) break; |
| if (!m_strsep_bit(str, &md->flags, MDF_CONNECTED_IND)) break; |
| if (!m_strsep_bit(str, &md->flags, MDF_FULL_SYNC)) break; |
| if (!m_strsep_bit(str, &md->flags, MDF_PEER_OUT_DATED)) break; |
| if (!m_strsep_bit(str, &md->flags, MDF_CRASHED_PRIMARY)) break; |
| } while (0); |
| } |
| |
| void m_set_v9_uuid(struct md_cpu *md, int node_id, char **argv, int argc __attribute((unused))) |
| { |
| char **str; |
| int i; |
| |
| str = &argv[0]; |
| |
| do { |
| if (!m_strsep_u64(str, &md->current_uuid)) break; |
| if (!m_strsep_u64(str, &md->peers[node_id].bitmap_uuid)) break; |
| for (i = 0; i < HISTORY_UUIDS_V08; i++) |
| if (!m_strsep_u64(str, &md->history_uuids[i])) return; |
| if (!m_strsep_bit(str, &md->flags, MDF_CONSISTENT)) break; |
| if (!m_strsep_bit(str, &md->flags, MDF_WAS_UP_TO_DATE)) break; |
| if (!m_strsep_bit(str, &md->flags, MDF_PRIMARY_IND)) break; |
| if (!m_strsep_bit(str, &md->flags, MDF_CRASHED_PRIMARY)) break; |
| if (!m_strsep_bit(str, &md->flags, MDF_AL_CLEAN)) break; |
| if (!m_strsep_bit(str, &md->flags, MDF_AL_DISABLED)) break; |
| if (!m_strsep_bit(str, &md->peers[node_id].flags, MDF_PEER_CONNECTED)) break; |
| if (!m_strsep_bit(str, &md->peers[node_id].flags, MDF_PEER_OUTDATED)) break; |
| if (!m_strsep_bit(str, &md->peers[node_id].flags, MDF_PEER_FENCING)) break; |
| if (!m_strsep_bit(str, &md->peers[node_id].flags, MDF_PEER_FULL_SYNC)) break; |
| if (!m_strsep_bit(str, &md->peers[node_id].flags, MDF_PEER_DEVICE_SEEN)) break; |
| } while (0); |
| } |
| |
| int m_outdate_gc(struct md_cpu *md __attribute((unused))) |
| { |
| fprintf(stderr, "Can not outdate GC based meta data!\n"); |
| |
| return 5; |
| } |
| |
| int m_outdate_uuid(struct md_cpu *md) |
| { |
| if ( !(md->flags & MDF_CONSISTENT) ) { |
| return 5; |
| } |
| |
| md->flags &= ~MDF_WAS_UP_TO_DATE; |
| |
| return 0; |
| } |
| |
| int m_invalidate_gc(struct md_cpu *md) |
| { |
| md->gc[Flags] &= ~MDF_CONSISTENT; |
| md->gc[Flags] |= MDF_FULL_SYNC; |
| |
| return 5; |
| } |
| |
| int m_invalidate_uuid(struct md_cpu *md) |
| { |
| md->flags &= ~MDF_CONSISTENT; |
| md->flags &= ~MDF_WAS_UP_TO_DATE; |
| md->flags |= MDF_FULL_SYNC; |
| |
| return 0; |
| } |
| |
| int m_invalidate_v9_uuid(struct md_cpu *md) |
| { |
| int node_id; |
| |
| md->flags &= ~MDF_CONSISTENT; |
| md->flags &= ~MDF_WAS_UP_TO_DATE; |
| |
| for (node_id = 0; node_id < DRBD_NODE_ID_MAX; node_id++) { |
| md->peers[node_id].flags |= MDF_PEER_FULL_SYNC; |
| } |
| |
| return 0; |
| } |
| |
| |
| /****************************************** |
| begin of v06 {{{ |
| ******************************************/ |
| |
| int v06_md_disk_to_cpu(struct format *cfg) |
| { |
| PREAD(cfg, on_disk_buffer, sizeof(struct md_on_disk_06), cfg->md_offset); |
| md_disk_06_to_cpu(&cfg->md, (struct md_on_disk_06*)on_disk_buffer); |
| return v06_validate_md(cfg); |
| } |
| |
| int v06_md_cpu_to_disk(struct format *cfg) |
| { |
| if (v06_validate_md(cfg)) |
| return -1; |
| md_cpu_to_disk_06(on_disk_buffer, &cfg->md); |
| PWRITE(cfg, on_disk_buffer, sizeof(struct md_on_disk_06), cfg->md_offset); |
| return 0; |
| } |
| |
| int v06_parse(struct format *cfg, char **argv, int argc, int *ai) |
| { |
| unsigned long minor; |
| char *e; |
| |
| if (argc < 1) { |
| fprintf(stderr, "Too few arguments for format\n"); |
| exit(20); |
| } |
| |
| e = argv[0]; |
| minor = strtol(argv[0], &e, 0); |
| if (*e != 0 || minor > 255UL) { |
| fprintf(stderr, "'%s' is not a valid minor number.\n", argv[0]); |
| exit(20); |
| } |
| if (asprintf(&e, "%s/drbd%lu", DRBD_LIB_DIR, minor) <= 18) { |
| fprintf(stderr, "asprintf() failed.\n"); |
| exit(20); |
| }; |
| cfg->md_device_name = e; |
| |
| *ai += 1; |
| |
| return 0; |
| } |
| |
| int v06_md_open(struct format *cfg) |
| { |
| struct stat sb; |
| |
| cfg->md_fd = open(cfg->md_device_name, O_RDWR); |
| |
| if (cfg->md_fd == -1) { |
| PERROR("open(%s) failed", cfg->md_device_name); |
| return NO_VALID_MD_FOUND; |
| } |
| |
| if (fstat(cfg->md_fd, &sb)) { |
| PERROR("fstat() failed"); |
| return NO_VALID_MD_FOUND; |
| } |
| |
| if (!S_ISREG(sb.st_mode)) { |
| fprintf(stderr, "'%s' is not a plain file!\n", |
| cfg->md_device_name); |
| return NO_VALID_MD_FOUND; |
| } |
| |
| if (cfg->ops->md_disk_to_cpu(cfg)) { |
| return NO_VALID_MD_FOUND; |
| } |
| |
| return VALID_MD_FOUND; |
| } |
| |
| int generic_md_close(struct format *cfg) |
| { |
| /* On /dev/ram0 we may not use O_SYNC for some kernels (eg. RHEL6 2.6.32), |
| * and fsync() returns EIO, too. So we don't do error checking here. */ |
| fsync(cfg->md_fd); |
| if (close(cfg->md_fd)) { |
| PERROR("close() failed"); |
| return -1; |
| } |
| return 0; |
| } |
| |
| int v06_md_initialize(struct format *cfg, |
| int do_disk_writes __attribute((unused)), |
| int max_peers __attribute((unused))) |
| { |
| cfg->md.gc[Flags] = 0; |
| cfg->md.gc[HumanCnt] = 1; /* THINK 0? 1? */ |
| cfg->md.gc[TimeoutCnt] = 1; |
| cfg->md.gc[ConnectedCnt] = 1; |
| cfg->md.gc[ArbitraryCnt] = 1; |
| cfg->md.max_peers = 1; |
| cfg->md.magic = DRBD_MD_MAGIC_06; |
| return 0; |
| } |
| |
| /****************************************** |
| }}} end of v06 |
| ******************************************/ |
| |
| static uint64_t max_usable_sectors(struct format *cfg) |
| { |
| /* We currently have two possible layouts: |
| * external: |
| * |----------- md_size_sect ------------------| |
| * [ 4k superblock ][ activity log ][ Bitmap ] |
| * | al_offset == 8 | |
| * | bm_offset = al_offset + X | |
| * ==> bitmap sectors = md_size_sect - bm_offset |
| * |
| * internal: |
| * |----------- md_size_sect ------------------| |
| * [data.....][ Bitmap ][ activity log ][ 4k superblock ] |
| * | al_offset < 0 | |
| * | bm_offset = al_offset - Y | |
| * ==> bitmap sectors = Y = al_offset - bm_offset |
| * |
| * There also used to be the fixed size internal meta data, |
| * which covers the last 128 MB of the device, |
| * and has the same layout as the "external:" above. |
| */ |
| if(cfg->md_index == DRBD_MD_INDEX_INTERNAL || |
| cfg->md_index == DRBD_MD_INDEX_FLEX_INT) { |
| /* for internal meta data, the available storage is limitted by |
| * the first meta data sector, even if the available bitmap |
| * space would support more. */ |
| return min3(cfg->md_offset, |
| cfg->al_offset, |
| cfg->bm_offset ) >> 9; |
| } else { |
| /* For external meta data, |
| * we are limited by available on-disk bitmap space. |
| * Ok, and by the lower level storage device; |
| * which we don't know about here :-( */ |
| ASSERT(cfg->md.bm_bytes_per_bit == 4096); |
| |
| return |
| /* bitmap sectors */ |
| (uint64_t)(cfg->md.md_size_sect - cfg->md.bm_offset) |
| * 512 /* sector size */ |
| * 8 /* bits per byte */ |
| / 64 /* 64 bit words, for interoperability */ |
| / cfg->md.max_peers |
| * 64 /* back to bits, per bitmap slot */ |
| /* storage bytes per bitmap bit; |
| * currently always 4096 */ |
| * cfg->md.bm_bytes_per_bit |
| / 512; /* and back to sectors */; |
| } |
| #undef min |
| } |
| |
| void re_initialize_md_offsets(struct format *cfg) |
| { |
| uint64_t md_size_sect; |
| int al_size_sect; |
| |
| /* These two are needed for bm_bytes()... Ensure sane defaults... */ |
| if (cfg->md.bm_bytes_per_bit == 0) |
| cfg->md.bm_bytes_per_bit = DEFAULT_BM_BLOCK_SIZE; |
| if (cfg->md.max_peers == 0) |
| cfg->md.max_peers = 1; |
| |
| al_size_sect = cfg->md.al_stripes * cfg->md.al_stripe_size_4k * 8; |
| switch(cfg->md_index) { |
| default: |
| cfg->md.md_size_sect = MD_RESERVED_SECT_07; |
| cfg->md.al_offset = MD_AL_OFFSET_07; |
| cfg->md.bm_offset = cfg->md.al_offset + al_size_sect; |
| break; |
| case DRBD_MD_INDEX_FLEX_EXT: |
| /* just occupy the full device; unit: sectors */ |
| cfg->md.md_size_sect = cfg->bd_size >> 9; |
| cfg->md.al_offset = MD_AL_OFFSET_07; |
| cfg->md.bm_offset = cfg->md.al_offset + al_size_sect; |
| break; |
| case DRBD_MD_INDEX_INTERNAL: /* only v07 */ |
| cfg->md.md_size_sect = MD_RESERVED_SECT_07; |
| cfg->md.al_offset = MD_AL_OFFSET_07; |
| cfg->md.bm_offset = MD_BM_OFFSET_07; |
| break; |
| case DRBD_MD_INDEX_FLEX_INT: |
| /* al size is still fixed */ |
| cfg->md.al_offset = -al_size_sect; |
| |
| /* we need (slightly less than) ~ this much bitmap sectors: */ |
| md_size_sect = bm_bytes(&cfg->md, cfg->bd_size >> 9) >> 9; |
| md_size_sect = ALIGN(md_size_sect, 8); /* align on 4K blocks */ |
| if (md_size_sect > (MD_BM_MAX_BYTE_FLEX>>9)) { |
| fprintf(stderr, "Bitmap for that device got too large.\n"); |
| if (BITS_PER_LONG == 32) |
| fprintf(stderr, "Maybe try a 64bit arch?\n"); |
| exit(10); |
| } |
| /* plus the "drbd meta data super block", |
| * and the activity log; unit still sectors */ |
| md_size_sect += MD_AL_OFFSET_07 + al_size_sect; |
| cfg->md.md_size_sect = md_size_sect; |
| cfg->md.bm_offset = -md_size_sect + MD_AL_OFFSET_07; |
| break; |
| } |
| cfg->al_offset = cfg->md_offset + cfg->md.al_offset * 512LL; |
| cfg->bm_offset = cfg->md_offset + cfg->md.bm_offset * 512LL; |
| cfg->max_usable_sect = max_usable_sectors(cfg); |
| |
| if (verbose >= 2) { |
| fprintf(stderr,"md_offset: "U64"\n", cfg->md_offset); |
| fprintf(stderr,"al_offset: "U64" (%d)\n", cfg->al_offset, cfg->md.al_offset); |
| fprintf(stderr,"bm_offset: "U64" (%d)\n", cfg->bm_offset, cfg->md.bm_offset); |
| fprintf(stderr,"md_size_sect: "U32"\n", cfg->md.md_size_sect); |
| fprintf(stderr,"max_usable_sect: "U64"\n", cfg->max_usable_sect); |
| } |
| } |
| |
| void initialize_al(struct format *cfg) |
| { |
| unsigned int mx = cfg->md.al_stripes * cfg->md.al_stripe_size_4k; |
| size_t al_size = mx * 4096; |
| memset(on_disk_buffer, 0x00, al_size); |
| if (format_version(cfg) >= DRBD_V08) { |
| /* DRBD <= 8.3 does not care if it is all zero, |
| * or otherwise wrong magic. |
| * |
| * For 8.4 and 9.0, we initialize to something that is |
| * valid magic, valid crc, and transaction_type = 0xffff. |
| */ |
| struct al_4k_transaction_on_disk *al = on_disk_buffer; |
| unsigned crc_be = 0; |
| int i; |
| for (i = 0; i < mx; i++, al++) { |
| al->magic.be = cpu_to_be32(DRBD_AL_MAGIC); |
| al->transaction_type.be = cpu_to_be16(AL_TR_INITIALIZED); |
| /* crc calculated once */ |
| if (i == 0) |
| crc_be = cpu_to_be32(crc32c(0, (void*)al, 4096)); |
| al->crc32c.be = crc_be; |
| } |
| } |
| pwrite_or_die(cfg, on_disk_buffer, al_size, cfg->al_offset, |
| "md_initialize_common:AL"); |
| } |
| |
| void check_for_existing_data(struct format *cfg); |
| |
| static void zeroout_bitmap(struct format *cfg) |
| { |
| const size_t bitmap_bytes = |
| ALIGN(bm_bytes(&cfg->md, cfg->bd_size >> 9), cfg->md_hard_sect_size); |
| uint64_t range[2]; |
| int err; |
| |
| range[0] = cfg->bm_offset; /* start offset */ |
| range[1] = bitmap_bytes; /* len */ |
| |
| fprintf(stderr,"initializing bitmap (%u KB) to all zero\n", |
| (unsigned int)(bitmap_bytes>>10)); |
| |
| err = ioctl(cfg->md_fd, BLKZEROOUT, &range); |
| if (!err) |
| return; |
| |
| PERROR("ioctl(%s, BLKZEROOUT, [%llu, %llu]) failed", cfg->md_device_name, |
| (unsigned long long)range[0], (unsigned long long)range[1]); |
| fprintf(stderr, "Using slow(er) fallback.\n"); |
| |
| { |
| /* need to sector-align this for O_DIRECT. |
| * "sector" here means hard-sect size, which may be != 512. |
| * Note that even though ALIGN does round up, for sector sizes |
| * of 512, 1024, 2048, 4096 Bytes, this will be fully within |
| * the claimed meta data area, since we already align all |
| * "interesting" parts of that to 4kB */ |
| size_t i = bitmap_bytes; |
| off_t bm_on_disk_off = cfg->bm_offset; |
| unsigned int percent_done = 0; |
| unsigned int percent_last_report = 0; |
| size_t chunk; |
| |
| memset(on_disk_buffer, 0x00, buffer_size); |
| while (i) { |
| chunk = buffer_size < i ? buffer_size : i; |
| pwrite_or_die(cfg, on_disk_buffer, |
| chunk, bm_on_disk_off, |
| "md_initialize_common:BM"); |
| bm_on_disk_off += chunk; |
| i -= chunk; |
| percent_done = 100*(bitmap_bytes-i)/bitmap_bytes; |
| if (percent_done != percent_last_report) { |
| fprintf(stderr,"\r%u%%", percent_done); |
| percent_last_report = percent_done; |
| } |
| } |
| fprintf(stderr,"\r100%%\n"); |
| } |
| } |
| |
| /* MAYBE DOES DISK WRITES!! */ |
| int md_initialize_common(struct format *cfg, int do_disk_writes) |
| { |
| cfg->md.al_nr_extents = 257; /* arbitrary. */ |
| cfg->md.bm_bytes_per_bit = DEFAULT_BM_BLOCK_SIZE; |
| |
| re_initialize_md_offsets(cfg); |
| |
| if (!do_disk_writes) |
| return 0; |
| |
| check_for_existing_data(cfg); |
| |
| /* do you want to initialize al to something more useful? */ |
| printf("initializing activity log\n"); |
| if (MD_AL_MAX_SECT_07*512 > buffer_size) { |
| fprintf(stderr, "%s:%u: LOGIC BUG\n" , __FILE__ , __LINE__ ); |
| exit(111); |
| } |
| initialize_al(cfg); |
| |
| /* We initialize the bitmap to all 0 for the use case that someone |
| * might use set-gi to pretend that the backend devices are completely |
| * in sync. (I.e. thinly provisioned storage, all zeroes) |
| * |
| * In case it current UUID is left at UUID_JUST_CREATED the kernel |
| * driver will set all bits to 1 when using it in a handshake... |
| */ |
| zeroout_bitmap(cfg); |
| |
| return 0; |
| } |
| |
| /****************************************** |
| begin of v07 {{{ |
| ******************************************/ |
| |
| uint64_t v07_style_md_get_byte_offset(const int idx, const uint64_t bd_size) |
| { |
| uint64_t offset; |
| |
| switch(idx) { |
| default: /* external, some index */ |
| offset = MD_RESERVED_SECT_07 * idx * 512; |
| break; |
| case DRBD_MD_INDEX_INTERNAL: |
| offset = (bd_size & ~4095LLU) |
| - MD_RESERVED_SECT_07 * 512; |
| break; |
| case DRBD_MD_INDEX_FLEX_INT: |
| /* sizeof(struct md_on_disk_07) == 4k |
| * position: last 4k aligned block of 4k size */ |
| offset = bd_size - 4096LLU; |
| offset &= ~4095LLU; |
| break; |
| case DRBD_MD_INDEX_FLEX_EXT: |
| offset = 0; |
| break; |
| } |
| return offset; |
| } |
| |
| void printf_al_07(struct format *cfg, struct al_sector_on_disk *al_disk) |
| { |
| struct al_sector_cpu al_cpu; |
| unsigned s, i; |
| unsigned max_slot_nr = 0; |
| for (s = 0; s < MD_AL_MAX_SECT_07; s++) { |
| int ok = v07_al_disk_to_cpu(&al_cpu, al_disk + s); |
| printf("# sector %2u { %s\n", s, ok ? "valid" : "invalid"); |
| printf("# \tmagic: 0x%08x\n", al_cpu.magic); |
| printf("# \ttr: %10u\n", al_cpu.tr_number); |
| for (i = 0; i < 62; i++) { |
| printf("# \t%2u: %10u %10u\n", i, |
| al_cpu.updates[i].pos, |
| al_cpu.updates[i].extent); |
| if (al_cpu.updates[i].pos > max_slot_nr && |
| al_cpu.updates[i].pos != -1U) |
| max_slot_nr = al_cpu.updates[i].pos; |
| } |
| printf("# \txor: 0x%08x\n", al_cpu.xor_sum); |
| printf("# }\n"); |
| } |
| if (max_slot_nr >= cfg->md.al_nr_extents) |
| printf( |
| "### CAUTION: maximum slot number found in AL: %u\n" |
| "### CAUTION: but 'super-block' al-extents is: %u\n", |
| max_slot_nr, cfg->md.al_nr_extents); |
| } |
| |
| void printf_al_84(struct format *cfg, struct al_4k_transaction_on_disk *al_disk, |
| unsigned block_nr_offset, unsigned N) |
| { |
| struct al_4k_cpu al_cpu; |
| unsigned b, i; |
| unsigned max_slot_nr = 0; |
| for (b = 0; b < N; b++) { |
| int ok = v84_al_disk_to_cpu(&al_cpu, al_disk + b); |
| if (!ok) { |
| printf("# block %2u { INVALID }\n", b + block_nr_offset); |
| continue; |
| } |
| if (al_cpu.transaction_type == 0xffff) { |
| printf("# block %2u { INITIALIZED }\n", b + block_nr_offset); |
| continue; |
| } |
| printf("# block %2u {\n", b + block_nr_offset); |
| printf("# \tmagic: 0x%08x\n", al_cpu.magic); |
| printf("# \ttype: 0x%04x\n", al_cpu.transaction_type); |
| printf("# \ttr: %10u\n", al_cpu.tr_number); |
| printf("# \tactive set size: %u\n", al_cpu.context_size); |
| if (al_cpu.context_size -1 > max_slot_nr) |
| max_slot_nr = al_cpu.context_size -1; |
| for (i = 0; i < AL_CONTEXT_PER_TRANSACTION; i++) { |
| unsigned slot = al_cpu.context_start_slot_nr + i; |
| if (al_cpu.context[i] == ~0U && slot >= al_cpu.context_size) |
| continue; |
| if (slot > max_slot_nr) |
| max_slot_nr = slot; |
| printf("# \t%2u: %10u %10u\n", i, slot, al_cpu.context[i]); |
| } |
| printf("# \tupdates: %u\n", al_cpu.n_updates); |
| for (i = 0; i < AL_UPDATES_PER_TRANSACTION; i++) { |
| if (i >= al_cpu.n_updates && |
| al_cpu.update_slot_nr[i] == (uint16_t)(~0U)) |
| continue; |
| printf("# \t%2u: %10u %10u\n", i, |
| al_cpu.update_slot_nr[i], |
| al_cpu.update_extent_nr[i]); |
| if (al_cpu.update_slot_nr[i] > max_slot_nr) |
| max_slot_nr = al_cpu.update_slot_nr[i]; |
| } |
| printf("# \tcrc32c: 0x%08x\n", al_cpu.crc32c); |
| printf("# }\n"); |
| } |
| if (max_slot_nr >= cfg->md.al_nr_extents) |
| printf( |
| "### CAUTION: maximum slot number found in AL: %u\n" |
| "### CAUTION: but 'super-block' al-extents is: %u\n", |
| max_slot_nr, cfg->md.al_nr_extents); |
| } |
| |
| void printf_al(struct format *cfg) |
| { |
| off_t al_on_disk_off = cfg->al_offset; |
| off_t al_size = cfg->md.al_stripes * cfg->md.al_stripe_size_4k * 4096; |
| struct al_sector_on_disk *al_512_disk = on_disk_buffer; |
| struct al_4k_transaction_on_disk *al_4k_disk = on_disk_buffer; |
| unsigned block_nr_offset = 0; |
| unsigned N; |
| |
| int is_al_84 = is_v09(cfg) || |
| (is_v08(cfg) && |
| (cfg->md.al_stripes != 1 || cfg->md.al_stripe_size_4k != 8)); |
| |
| printf("# al {\n"); |
| |
| while (al_size) { |
| off_t chunk = al_size; |
| if (chunk > buffer_size) |
| chunk = buffer_size; |
| ASSERT(chunk); |
| pread_or_die(cfg, on_disk_buffer, chunk, al_on_disk_off, "printf_al"); |
| al_on_disk_off += chunk; |
| al_size -= chunk; |
| N = chunk/4096; |
| |
| /* FIXME |
| * we should introduce a new meta data "super block" magic, so we won't |
| * have the same super block with two different activity log |
| * transaction layouts */ |
| if (format_version(cfg) < DRBD_V08) |
| printf_al_07(cfg, al_512_disk); |
| |
| /* looks like we have the new al format */ |
| else if (is_al_84 || |
| DRBD_AL_MAGIC == be32_to_cpu(al_4k_disk[0].magic.be) || |
| DRBD_AL_MAGIC == be32_to_cpu(al_4k_disk[1].magic.be)) { |
| is_al_84 = 1; |
| printf_al_84(cfg, al_4k_disk, block_nr_offset, N); |
| } |
| |
| /* try the old al format anyways */ |
| else |
| printf_al_07(cfg, al_512_disk); |
| |
| block_nr_offset += N; |
| if (al_size && !is_al_84) { |
| printf("### UNEXPECTED ACTIVITY LOG SIZE!\n"); |
| } |
| } |
| printf("# }\n"); |
| } |
| |
| /* One activity log extent represents 4M of storage, |
| * one bit corresponds to 4k. |
| * 4M / 4k / 8bit per byte */ |
| #define BM_BYTES_PER_AL_EXT (1UL << (22 - 12 - 3)) |
| |
| struct al_cursor { |
| unsigned i; |
| uint32_t tr_number; |
| }; |
| |
| static int replay_al_07(struct format *cfg, uint32_t *hot_extent) |
| { |
| unsigned int mx; |
| struct al_sector_cpu al_cpu[MD_AL_MAX_SECT_07]; |
| unsigned char valid[MD_AL_MAX_SECT_07]; |
| |
| struct al_sector_on_disk *al_disk = on_disk_buffer; |
| |
| unsigned b, i; |
| |
| int found_valid = 0; |
| struct al_cursor oldest = { 0, }; |
| struct al_cursor newest = { 0, }; |
| |
| /* Endian convert, validate, and find oldest to newest log range. |
| * In contrast to the 8.4 log format, this log format does NOT |
| * use all log space, but only as many sectors as absolutely necessary. |
| * |
| * We need to trust the "al_nr_extents" setting in the "super block". |
| */ |
| #define AL_EXTENTS_PT 61 |
| /* mx = 1 + div_ceil(al_nr_extents, AL_EXTENTS_PT); */ |
| mx = 1 + (cfg->md.al_nr_extents + AL_EXTENTS_PT -1) / AL_EXTENTS_PT; |
| for (b = 0; b < mx; b++) { |
| valid[b] = v07_al_disk_to_cpu(al_cpu + b, al_disk + b); |
| if (!valid[b]) |
| continue; |
| if (++found_valid == 1) { |
| oldest.i = b; |
| oldest.tr_number = al_cpu[b].tr_number; |
| newest = oldest; |
| continue; |
| } |
| |
| d_expect(al_cpu[b].tr_number != oldest.tr_number); |
| d_expect(al_cpu[b].tr_number != newest.tr_number); |
| if ((int)al_cpu[b].tr_number - (int)oldest.tr_number < 0) { |
| d_expect(oldest.tr_number - al_cpu[b].tr_number + b - oldest.i == mx); |
| oldest.i = b; |
| oldest.tr_number = al_cpu[b].tr_number; |
| } |
| if ((int)al_cpu[b].tr_number - (int)newest.tr_number > 0) { |
| d_expect(al_cpu[b].tr_number - newest.tr_number == b - newest.i); |
| newest.i = b; |
| newest.tr_number = al_cpu[b].tr_number; |
| } |
| } |
| |
| if (!found_valid) { |
| /* not even one transaction was valid. |
| * Has this ever been initialized correctly? */ |
| fprintf(stderr, "No usable activity log found.\n"); |
| /* with up to 8.3 style activity log, this is NOT an error. */ |
| return 0; |
| } |
| |
| /* we do expect at most one corrupt transaction, and only in case |
| * things went wrong during transaction write. */ |
| if (found_valid != mx) { |
| fprintf(stderr, "%u corrupt or uninitialized AL transactions found\n", mx - found_valid); |
| fprintf(stderr, "You can safely ignore this if this node was cleanly stopped (no crash).\n"); |
| } |
| |
| /* Any other paranoia checks possible with this log format? */ |
| |
| /* Ok, so we found valid update transactions. Reconstruct the "active |
| * set" at the time of the newest transaction. */ |
| |
| /* wrap around */ |
| if (newest.i < oldest.i) |
| newest.i += mx; |
| |
| for (b = oldest.i; b <= newest.i; b++) { |
| unsigned idx = b % mx; |
| if (!valid[idx]) |
| continue; |
| |
| /* This loop processes both "context" and "update" information. |
| * There is only one update, on index 0, |
| * which is why this loop counts down. */ |
| for (i = AL_EXTENTS_PT; (int)i >= 0; i--) { |
| unsigned slot = al_cpu[idx].updates[i].pos; |
| if (al_cpu[idx].updates[i].extent == ~0U) |
| continue; |
| if (slot >= AL_EXTENTS_MAX) { |
| fprintf(stderr, "slot number out of range: tr:%u slot:%u\n", |
| idx, slot); |
| continue; |
| } |
| hot_extent[slot] = al_cpu[idx].updates[i].extent; |
| } |
| } |
| return found_valid; |
| } |
| |
| static unsigned int al_tr_number_to_on_disk_slot(struct format *cfg, unsigned int b, unsigned int mx) |
| { |
| const unsigned int stripes = cfg->md.al_stripes; |
| const unsigned int stripe_size_4kB = cfg->md.al_stripe_size_4k; |
| |
| /* transaction number, modulo on-disk ring buffer wrap around */ |
| unsigned int t = b % mx; |
| |
| /* ... to aligned 4k on disk block */ |
| t = ((t % stripes) * stripe_size_4kB) + t/stripes; |
| |
| return t; |
| } |
| |
| |
| /* Expects the AL to be read into on_disk_buffer already. |
| * Returns negative error code for non-interpretable data, |
| * 0 for "just mark me clean, nothing more to do", |
| * and positive if we have to apply something. */ |
| int replay_al_84(struct format *cfg, uint32_t *hot_extent) |
| { |
| const unsigned int mx = cfg->md.al_stripes * cfg->md.al_stripe_size_4k; |
| struct al_4k_transaction_on_disk *al_disk = on_disk_buffer; |
| struct al_4k_cpu *al_cpu = NULL; |
| unsigned b, o, i; |
| |
| int found_valid = 0; |
| int found_valid_updates = 0; |
| struct al_cursor oldest = { 0, }; |
| struct al_cursor newest = { 0, }; |
| |
| al_cpu = calloc(mx, sizeof(*al_cpu)); |
| if (!al_cpu) { |
| fprintf(stderr, "Could not calloc(%u, sizeof(*al_cpu))\n", mx); |
| exit(30); /* FIXME sane exit codes */ |
| } |
| |
| /* endian convert, validate, and find oldest to newest log range */ |
| for (b = 0; b < mx; b++) { |
| o = al_tr_number_to_on_disk_slot(cfg, b, mx); |
| if (!v84_al_disk_to_cpu(al_cpu + b, al_disk + o)) |
| continue; |
| ++found_valid; |
| if (al_cpu[b].transaction_type == AL_TR_INITIALIZED) |
| continue; |
| d_expect(al_cpu[b].transaction_type == AL_TR_UPDATE); |
| if (++found_valid_updates == 1) { |
| oldest.i = b; |
| oldest.tr_number = al_cpu[b].tr_number; |
| newest = oldest; |
| continue; |
| } |
| d_expect(al_cpu[b].tr_number != oldest.tr_number); |
| d_expect(al_cpu[b].tr_number != newest.tr_number); |
| if ((int)al_cpu[b].tr_number - (int)oldest.tr_number < 0) { |
| d_expect(oldest.tr_number - al_cpu[b].tr_number + b - oldest.i == mx); |
| oldest.i = b; |
| oldest.tr_number = al_cpu[b].tr_number; |
| } |
| if ((int)al_cpu[b].tr_number - (int)newest.tr_number > 0) { |
| d_expect(al_cpu[b].tr_number - newest.tr_number == b - newest.i); |
| newest.i = b; |
| newest.tr_number = al_cpu[b].tr_number; |
| } |
| } |
| |
| if (!found_valid) { |
| /* not even one transaction was valid. |
| * Has this ever been initialized correctly? */ |
| fprintf(stderr, "No usable activity log found. Do you need to create-md?\n"); |
| free(al_cpu); |
| return -ENODATA; |
| } |
| |
| /* we do expect at most one corrupt transaction, and only in case |
| * things went wrong during transaction write. */ |
| if (found_valid != mx) |
| fprintf(stderr, "%u corrupt AL transactions found\n", mx - found_valid); |
| |
| if (!found_valid_updates) { |
| if (found_valid == mx) |
| /* nothing to do, all slots are valid AL_TR_INITIALIZED */ |
| return 0; |
| |
| /* this is only expected, in case the _first_ transaction |
| * somehow failed. */ |
| if (!al_cpu[0].is_valid && found_valid == mx - 1) |
| return 0; |
| |
| /* Hmm. Some transactions are valid. |
| * Some are not. |
| * This is not expected. */ |
| /* FIXME how do we want to handle this? */ |
| fprintf(stderr, "No valid AL update transaction found.\n"); |
| return -EINVAL; |
| } |
| |
| /* FIXME what do we do |
| * about more than one corrupt transaction? |
| * about corrupt transaction in the middle of the oldest -> newest range? */ |
| |
| /* Ok, so we found valid update transactions. Reconstruct the "active |
| * set" at the time of the newest transaction. */ |
| |
| /* wrap around */ |
| if (newest.i < oldest.i) |
| newest.i += mx; |
| |
| for (b = oldest.i; b <= newest.i; b++) { |
| unsigned idx = b % mx; |
| if (!al_cpu[idx].is_valid || al_cpu[idx].transaction_type == AL_TR_INITIALIZED) |
| continue; |
| |
| for (i = 0; i < AL_CONTEXT_PER_TRANSACTION; i++) { |
| unsigned slot = al_cpu[idx].context_start_slot_nr + i; |
| if (al_cpu[idx].context[i] == ~0U && slot >= al_cpu[idx].context_size) |
| continue; |
| if (slot >= AL_EXTENTS_MAX) { |
| fprintf(stderr, "slot number out of range: tr:%u slot:%u\n", |
| idx, slot); |
| continue; |
| } |
| hot_extent[slot] = al_cpu[idx].context[i]; |
| } |
| for (i = 0; i < AL_UPDATES_PER_TRANSACTION; i++) { |
| unsigned slot = al_cpu[idx].update_slot_nr[i]; |
| if (i >= al_cpu[idx].n_updates && slot == (uint16_t)(~0U)) |
| continue; |
| if (slot >= AL_EXTENTS_MAX) { |
| fprintf(stderr, "update slot number out of range: tr:%u slot:%u\n", |
| idx, slot); |
| continue; |
| } |
| hot_extent[slot] = al_cpu[idx].update_extent_nr[i]; |
| } |
| } |
| return found_valid_updates; |
| } |
| |
| int cmp_u32(const void *p1, const void *p2) |
| { |
| const unsigned a = *(unsigned *)p1; |
| const unsigned b = *(unsigned *)p2; |
| |
| /* how best to deal with 32bit wrap? */ |
| return a < b ? -1 : a == b ? 0 : 1; |
| } |
| |
| void apply_al(struct format *cfg, uint32_t *hot_extent) |
| { |
| const unsigned int extents_size = BM_BYTES_PER_AL_EXT * cfg->md.max_peers; |
| const size_t bm_bytes = ALIGN(cfg->bm_bytes, cfg->md_hard_sect_size); |
| off_t bm_on_disk_off = cfg->bm_offset; |
| size_t bm_on_disk_pos = 0; |
| size_t chunk = 0; |
| int i, j; |
| |
| /* can only be AL_EXTENTS_MAX * BM_BYTES_PER_AL_EXT * 8, |
| * which currently is 65534 * 128 * 8 == 67106816 |
| * fits easily into 32bit. */ |
| unsigned additional_bits_set = 0; |
| uint64_t *w; |
| char ppb[10]; |
| |
| /* Now, actually apply this stuff to the on-disk bitmap. |
| * Since one AL extent corresponds to 128 bytes of bitmap, |
| * we need to do some read/modify/write cycles here. |
| * |
| * Note that this can be slow due to the use of O_DIRECT, |
| * worst case it does 65534 (AL_EXTENTS_MAX) cycles of |
| * - read 128 kByte (buffer_size) |
| * - memset 128 Bytes (BM_BYTES_PER_AL_EXT) to 0xff |
| * - write 128 kByte |
| * This implementation could optimized in various ways: |
| * - don't use direct IO; has other drawbacks |
| * - first scan hot_extents for extent ranges, |
| * and optimize the IO size. |
| * - use aio with multiple buffers |
| * - ... |
| */ |
| for (i = 0; i < AL_EXTENTS_MAX; i++) { |
| size_t bm_pos; |
| unsigned bits_set = 0; |
| if (hot_extent[i] == ~0U) |
| break; |
| |
| ASSERT(cfg->md.bm_bytes_per_bit == 4096); |
| ASSERT(BM_BYTES_PER_AL_EXT % 4 == 0); |
| |
| bm_pos = hot_extent[i] * extents_size; |
| if (bm_pos >= bm_bytes) { |
| fprintf(stderr, "extent %u beyond end of bitmap!\n", hot_extent[i]); |
| /* could break or return error here, |
| * but I'll just print a warning, and skip, each of them. */ |
| continue; |
| } |
| |
| /* On first iteration, or when the current position in the bitmap |
| * exceeds the current buffer, write out the current buffer, if any, |
| * and read in the next (at most buffer_size) chunk of bitmap, |
| * containing the currently processed bitmap region. |
| */ |
| |
| if (i == 0 || |
| bm_pos + extents_size >= bm_on_disk_pos + chunk) { |
| if (i != 0) |
| pwrite_or_die(cfg, on_disk_buffer, chunk, |
| bm_on_disk_off + bm_on_disk_pos, |
| "apply_al"); |
| |
| /* don't special case logical sector size != 512, |
| * operate in 4k always. */ |
| bm_on_disk_pos = bm_pos & ~(off_t)(4095); |
| chunk = bm_bytes - bm_on_disk_pos; |
| if (chunk > buffer_size) |
| chunk = buffer_size; |
| pread_or_die(cfg, on_disk_buffer, chunk, |
| bm_on_disk_off + bm_on_disk_pos, |
| "apply_al"); |
| } |
| ASSERT(bm_pos - bm_on_disk_pos <= chunk - extents_size); |
| ASSERT((bm_pos - bm_on_disk_pos) % sizeof(uint64_t) == 0); |
| w = (uint64_t *)on_disk_buffer |
| + (bm_pos - bm_on_disk_pos)/sizeof(uint64_t); |
| for (j = 0; j < extents_size/sizeof(uint64_t); j++) |
| bits_set += generic_hweight64(w[j]); |
| |
| additional_bits_set += extents_size * 8 - bits_set; |
| memset((char*)on_disk_buffer + (bm_pos - bm_on_disk_pos), |
| 0xff, extents_size); |
| } |
| /* we still need to write out the buffer of the last iteration */ |
| if (i != 0) { |
| pwrite_or_die(cfg, on_disk_buffer, chunk, |
| bm_on_disk_off + bm_on_disk_pos, |
| "apply_al"); |
| fprintf(stderr, "Marked additional %s as out-of-sync based on AL.\n", |
| ppsize(ppb, additional_bits_set * 4)); |
| } else |
| fprintf(stderr, "Nothing to do.\n"); |
| } |
| |
| int need_to_apply_al(struct format *cfg) |
| { |
| switch (format_version(cfg)) { |
| case DRBD_V06: |
| return 0; /* there was no activity log in 0.6 */ |
| case DRBD_V07: |
| return cfg->md.gc[Flags] & MDF_PRIMARY_IND; |
| case DRBD_V08: |
| case DRBD_V09: |
| return cfg->md.flags & MDF_PRIMARY_IND; |
| case DRBD_UNKNOWN: |
| fprintf(stderr, "BUG in %s().\n", __FUNCTION__); |
| } |
| return 0; |
| } |
| |
| int v08_move_internal_md_after_resize(struct format *cfg); |
| int meta_apply_al(struct format *cfg, char **argv __attribute((unused)), int argc) |
| { |
| off_t al_size; |
| struct al_4k_transaction_on_disk *al_4k_disk = on_disk_buffer; |
| uint32_t hot_extent[AL_EXTENTS_MAX]; |
| int need_to_update_md_flags = 0; |
| int re_initialize_anyways = 0; |
| int err; |
| |
| if (argc > 0) |
| fprintf(stderr, "Ignoring additional arguments\n"); |
| |
| if (format_version(cfg) < DRBD_V07) { |
| fprintf(stderr, "apply-al only implemented for DRBD >= 0.7\n"); |
| return -1; |
| } |
| |
| err = cfg->ops->open(cfg); |
| if (err == VALID_MD_FOUND_AT_LAST_KNOWN_LOCATION) { |
| if (v08_move_internal_md_after_resize(cfg) == 0) |
| err = cfg->ops->open(cfg); |
| } |
| if (err != VALID_MD_FOUND) { |
| fprintf(stderr, "No valid meta data found\n"); |
| return -1; |
| } |
| |
| al_size = cfg->md.al_stripes * cfg->md.al_stripe_size_4k * 4096; |
| |
| /* read in first chunk (which is actually the whole AL |
| * for old fixed size 32k activity log */ |
| pread_or_die(cfg, on_disk_buffer, |
| al_size < buffer_size ? al_size : buffer_size, |
| cfg->al_offset, "apply_al"); |
| |
| /* init all extent numbers to -1U aka "unused" */ |
| memset(hot_extent, 0xff, sizeof(hot_extent)); |
| |
| /* replay al */ |
| if (is_v07(cfg)) |
| err = replay_al_07(cfg, hot_extent); |
| |
| /* FIXME |
| * we should introduce a new meta data "super block" magic, so we won't |
| * have the same super block with two different activity log |
| * transaction layouts */ |
| else if (DRBD_MD_MAGIC_84_UNCLEAN == cfg->md.magic || |
| DRBD_MD_MAGIC_09 == cfg->md.magic || |
| DRBD_AL_MAGIC == be32_to_cpu(al_4k_disk[0].magic.be) || |
| DRBD_AL_MAGIC == be32_to_cpu(al_4k_disk[1].magic.be) || |
| cfg->md.al_stripes != 1 || cfg->md.al_stripe_size_4k != 8) { |
| err = replay_al_84(cfg, hot_extent); |
| } else { |
| /* try the old al format anyways, this may be the first time we |
| * run after upgrading from < 8.4 to 8.4, and we need to |
| * transparently "convert" the activity log format. */ |
| err = replay_al_07(cfg, hot_extent); |
| re_initialize_anyways = 1; |
| } |
| |
| if (err < 0) { |
| /* ENODATA: |
| * most likely this is an uninitialized, |
| * or at least non-8.4-style activity log. |
| * Cannot do anything about that. |
| * |
| * EINVAL: |
| * Some valid 8.4 style INITIALIZED transactions found, |
| * but others have been corrupt, and no single "usable" |
| * update transaction was found. |
| * FIXME: what to do about that? |
| * We probably need some "FORCE" mode as well. */ |
| |
| if (need_to_apply_al(cfg)) { |
| /* 1, 2, 10, 20? FIXME sane exit codes! */ |
| if (err == -ENODATA) |
| return 1; |
| return 2; |
| } else if (is_v08(cfg) || is_v09(cfg)) { |
| fprintf(stderr, "Error ignored, no need to apply the AL\n"); |
| re_initialize_anyways = 1; |
| } |
| } |
| |
| /* do we need to actually apply it? */ |
| if (err > 0 && need_to_apply_al(cfg)) { |
| /* process hot extents in order, to reduce disk seeks. */ |
| qsort(hot_extent, ARRAY_SIZE(hot_extent), sizeof(hot_extent[0]), cmp_u32); |
| apply_al(cfg, hot_extent); |
| need_to_update_md_flags = 1; |
| } |
| |
| /* (Re-)initialize the activity log. |
| * This is needed on 8.4, and does not hurt on < 8.4. |
| * It may cause a "No usable activity log found" kernel message |
| * if it is attached to < 8.4, but that is cosmetic. |
| * We can skip this, if it was clean anyways (err == 0), |
| * or if we know that this is for 0.7. |
| */ |
| if (re_initialize_anyways || (err > 0 && !is_v07(cfg))) |
| initialize_al(cfg); |
| |
| if (format_version(cfg) >= DRBD_V08 && |
| ((cfg->md.flags & MDF_AL_CLEAN) == 0 || |
| cfg->md.magic != DRBD_MD_MAGIC_08)) |
| need_to_update_md_flags = 1; |
| |
| err = 0; |
| if (need_to_update_md_flags) { |
| /* Must not touch MDF_PRIMARY_IND. |
| * This flag is used in-kernel to determine which |
| * "wait-for-connection-timeout" is to be used. |
| * Maybe it is time to reconsider the concept or |
| * current implementation of "degr-wfc-timeout". |
| * RFC: |
| * If we set MDF_CRASHED_PRIMARY, in case MDF_PRIMARY_IND |
| * was set, and clear MDF_PRIMARY_IND here, we can then |
| * USE_DEGR_WFC_T as long as MDF_CRASHED_PRIMARY is set. |
| * Maybe that even results in better semantics. |
| */ |
| if (format_version(cfg) >= DRBD_V08) |
| cfg->md.flags |= MDF_AL_CLEAN; |
| if (is_v08(cfg)) |
| cfg->md.magic = DRBD_MD_MAGIC_08; |
| |
| err = cfg->ops->md_cpu_to_disk(cfg); |
| err = cfg->ops->close(cfg) || err; |
| if (err) |
| fprintf(stderr, "update of super block flags failed\n"); |
| } |
| |
| return err; |
| } |
| |
| unsigned long bm_bytes(const struct md_cpu * const md, uint64_t sectors) |
| { |
| unsigned long long bm_bits; |
| unsigned long sectors_per_bit = md->bm_bytes_per_bit >> 9; |
| |
| /* we announced 1 PiB as "supported" iirc. */ |
| ASSERT(sectors <= (1ULL << (50-9))); |
| /* sectors_per_bit == 0 would trigger a division by zero. |
| * At some point we will want to store sectors_per_bit directly, |
| * and not bytes_per_bit. |
| * To keep sanity, we limit ourselves to tracking only power-of-two |
| * multiples of 4k */ |
| ASSERT(md->bm_bytes_per_bit >= 4096); |
| ASSERT((md->bm_bytes_per_bit & (md->bm_bytes_per_bit - 1)) == 0); |
| |
| /* round up storage sectors to full "bitmap sectors per bit", then |
| * convert to number of bits needed, and round that up to 64bit words |
| * to ease interoperability between 32bit and 64bit architectures. |
| */ |
| bm_bits = (sectors + sectors_per_bit -1)/sectors_per_bit; |
| bm_bits = ALIGN(bm_bits, 64); |
| |
| /* convert to bytes, multiply by number of peers, |
| * and, because we do all our meta data IO in 4k blocks, |
| * round up to full 4k |
| */ |
| return ALIGN(bm_bits / 8 * md->max_peers, 4096); |
| } |
| |
| static void fprintf_bm_eol(FILE *f, unsigned int i, int peer_nr, const char* indent) |
| { |
| if ((i & 63) == peer_nr) |
| fprintf(f, "\n%s # at %llukB\n%s ", indent, (128LLU * (i - peer_nr)), indent); |
| else |
| fprintf(f, "\n%s ", indent); |
| } |
| |
| static unsigned int round_down(unsigned int i, unsigned int g) |
| { |
| return i / g * g; |
| /* return i - i % g; */ |
| } |
| |
| /* le_u64, because we want to be able to hexdump it reliably |
| * regardless of sizeof(long) */ |
| static void fprintf_bm(FILE *f, struct format *cfg, int peer_nr, const char* indent) |
| { |
| const int WPL = 8; |
| off_t bm_on_disk_off = cfg->bm_offset; |
| le_u32 const *bm = on_disk_buffer; |
| le_u32 cw; /* current word for rl encoding */ |
| le_u32 lw = {0}; /* low word for 64 bit output */ |
| const unsigned int n = cfg->bm_bytes/sizeof(*bm); |
| unsigned int max_peers = cfg->md.max_peers; |
| unsigned int count = 0; |
| unsigned int bits_set = 0; |
| unsigned int n_buffer = 0; |
| unsigned int r; /* real offset */ |
| unsigned int i; /* in-buffer offset */ |
| unsigned int j; |
| |
| /* |
| * The code below is a bit "funky" (ugly, unreadable, not only) with |
| * the modulos, and implicit offset modulo continuation on buffer wrap. |
| * To work, it requires the "chunk size" that is read-in per iteration |
| * to be a multiple of max_peer_size * 8 bytes, or it will be seriously |
| * confused on buffer wrap. |
| * IO-size should be a multiple of 4k anyways (because of O_DIRECT), |
| * align on 4k * max_peers seems to be an easy enough fix for said confusion. |
| * If you change buffer_size, double check this hackish reasoning as well. */ |
| const size_t max_chunk_size = round_down(buffer_size, 4096 * max_peers); |
| |
| ASSERT(buffer_size >= DRBD_PEERS_MAX * 4096); |
| ASSERT(max_chunk_size); |
| |
| i = peer_nr; |
| r = peer_nr; |
| cw.le = 0; /* silence compiler warning */ |
| fprintf(f, "{"); |
| |
| |
| if (r < n) |
| goto start; |
| |
| while (r < n) { |
| /* need to read on first iteration, |
| * and on buffer wrap */ |
| if (i * sizeof(*bm) >= max_chunk_size) { |
| size_t chunk; |
| i -= max_chunk_size / sizeof(*bm); |
| start: |
| chunk = ALIGN((n - round_down(r, max_peers)) * sizeof(*bm), cfg->md_hard_sect_size); |
| if (chunk > max_chunk_size) |
| chunk = max_chunk_size; |
| ASSERT(chunk); |
| pread_or_die(cfg, on_disk_buffer, |
| chunk, bm_on_disk_off, "fprintf_bm"); |
| bm_on_disk_off += chunk; |
| |
| n_buffer = chunk / sizeof(*bm); |
| } |
| next: |
| ASSERT(i < n_buffer); |
| if (count == 0) cw = bm[i]; |
| if (i % (WPL * max_peers) == peer_nr) { |
| if (!count) |
| fprintf_bm_eol(f, r, peer_nr, indent); |
| |
| /* j = i, because it may be continuation after buffer wrap */ |
| for (j = i; j < n_buffer && cw.le == bm[j].le; j += max_peers) |
| ; |
| unsigned int tmp = round_down(j / max_peers - i / max_peers, WPL); |
| |
| if (tmp > WPL) { |
| count += tmp; |
| r += tmp * max_peers; |
| i += tmp * max_peers; |
| if (j >= n_buffer && r < n) |
| continue; |
| } |
| if (count) { |
| fprintf(f, " %u times 0x%08X%08X;", |
| count / 2, le32_to_cpu(cw.le), le32_to_cpu(cw.le)); |
| bits_set += count * generic_hweight32(cw.le); |
| count = 0; |
| if (r >= n) |
| break; |
| /* don't "continue;", we may have not advanced i after buffer wrap, |
| * so that would be treated as an other buffer wrap */ |
| goto next; |
| } |
| } |
| ASSERT(i < n_buffer); |
| if (((i / max_peers) & 1) == 0) |
| lw = bm[i]; |
| else |
| fprintf(f, " 0x%08X%08X;", le32_to_cpu(bm[i].le), le32_to_cpu(lw.le)); |
| bits_set += generic_hweight32(bm[i].le); |
| r += max_peers; |
| i += max_peers; |
| } |
| fprintf(f, "\n%s}\n", indent); |
| cfg->bits_set = bits_set; |
| } |
| |
| void printf_bm(struct format *cfg) |
| { |
| int i; |
| |
| switch (format_version(cfg)) { |
| case DRBD_V06: |
| return; |
| case DRBD_V07: |
| case DRBD_V08: |
| printf("bm "); |
| fprintf_bm(stdout, cfg, 0, ""); |
| break; |
| case DRBD_V09: |
| for (i = 0; i < cfg->md.max_peers; i++) { |
| printf("bitmap[%d] ", i); |
| fprintf_bm(stdout, cfg, i, ""); |
| } |
| break; |
| case DRBD_UNKNOWN: |
| fprintf(stderr, "BUG in %s().\n", __FUNCTION__); |
| } |
| } |
| |
| static void clip_effective_size_and_bm_bytes(struct format *cfg) |
| { |
| if (cfg->md.effective_size > cfg->max_usable_sect) { |
| printf("# la-size-sect was too big (%llu), truncated (%llu)!\n", |
| (unsigned long long)cfg->md.effective_size, |
| (unsigned long long)cfg->max_usable_sect); |
| cfg->md.effective_size = cfg->max_usable_sect; |
| } |
| cfg->bm_bytes = bm_bytes(&cfg->md, cfg->md.effective_size); |
| } |
| |
| int v07_style_md_open(struct format *cfg) |
| { |
| struct stat sb; |
| unsigned int hard_sect_size = 0; |
| int ioctl_err; |
| int open_flags = O_RDWR | O_DIRECT; |
| |
| /* For old-style fixed size indexed external meta data, |
| * we cannot really use O_EXCL, we have to trust the given minor. |
| * |
| * For internal, or "flexible" external meta data, we open O_EXCL to |
| * avoid accidentally damaging otherwise in-use data, just because |
| * someone had a typo in the command line. |
| */ |
| if (cfg->md_index < 0) |
| open_flags |= O_EXCL; |
| |
| retry: |
| cfg->md_fd = open(cfg->md_device_name, open_flags ); |
| |
| if (cfg->md_fd == -1) { |
| int save_errno = errno; |
| PERROR("open(%s) failed", cfg->md_device_name); |
| if (save_errno == EBUSY && (open_flags & O_EXCL)) { |
| if ((!force && command->function == &meta_apply_al) || |
| !confirmed("Exclusive open failed. Do it anyways?")) |
| { |
| printf("Operation canceled.\n"); |
| exit(20); |
| } |
| open_flags &= ~O_EXCL; |
| goto retry; |
| } |
| if (save_errno == EINVAL && (open_flags & O_DIRECT)) { |
| /* shoo. O_DIRECT is not supported? |
| * retry, but remember this, so we can |
| * BLKFLSBUF appropriately */ |
| fprintf(stderr, "could not open with O_DIRECT, retrying without\n"); |
| open_flags &= ~O_DIRECT; |
| opened_odirect = 0; |
| goto retry; |
| } |
| exit(20); |
| } |
| |
| if (fstat(cfg->md_fd, &sb)) { |
| PERROR("fstat(%s) failed", cfg->md_device_name); |
| exit(20); |
| } |
| |
| if (!S_ISBLK(sb.st_mode)) { |
| if (!force) { |
| fprintf(stderr, "'%s' is not a block device!\n", |
| cfg->md_device_name); |
| exit(20); |
| } |
| cfg->bd_size = sb.st_size; |
| } |
| |
| if (format_version(cfg) >= DRBD_V08) { |
| ASSERT(cfg->md_index != DRBD_MD_INDEX_INTERNAL); |
| } |
| ioctl_err = ioctl(cfg->md_fd, BLKSSZGET, &hard_sect_size); |
| if (ioctl_err) { |
| fprintf(stderr, "ioctl(md_fd, BLKSSZGET) returned %d, " |
| "assuming hard_sect_size is 512 Byte\n", ioctl_err); |
| cfg->md_hard_sect_size = 512; |
| } else { |
| cfg->md_hard_sect_size = hard_sect_size; |
| if (verbose >= 2) |
| fprintf(stderr, "hard_sect_size is %d Byte\n", |
| cfg->md_hard_sect_size); |
| } |
| |
| if (!cfg->bd_size) |
| cfg->bd_size = bdev_size(cfg->md_fd); |
| /* check_for_existing_data() wants to read that much, |
| * so having less than that doesn't make sense. |
| * It's only 68kB anyway! */ |
| if (cfg->bd_size < SO_MUCH) { |
| fprintf(stderr, "%s is only %llu bytes. That's not enough.\n", |
| cfg->md_device_name, (long long unsigned)cfg->bd_size); |
| exit(10); |
| } |
| cfg->md_offset = |
| v07_style_md_get_byte_offset(cfg->md_index, cfg->bd_size); |
| if (cfg->md_offset > cfg->bd_size - 4096) { |
| fprintf(stderr, |
| "Device too small: expecting meta data block at\n" |
| "byte offset %lld, but %s is only %llu bytes.\n", |
| (signed long long)cfg->md_offset, |
| cfg->md_device_name, |
| (long long unsigned)cfg->bd_size); |
| exit(10); |
| } |
| |
| if (!opened_odirect && |
| (MAJOR(sb.st_rdev) != RAMDISK_MAJOR)) { |
| ioctl_err = ioctl(cfg->md_fd, BLKFLSBUF); |
| /* report error, but otherwise ignore. we could not open |
| * O_DIRECT, it is a "strange" device anyways. */ |
| if (ioctl_err) |
| fprintf(stderr, "ioctl(md_fd, BLKFLSBUF) returned %d, " |
| "we may read stale data\n", ioctl_err); |
| } |
| |
| if (cfg->ops->md_disk_to_cpu(cfg)) { |
| /* no valid meta data found. but we want to initialize |
| * al_offset and bm_offset anyways, so check_for_existing_data |
| * has something to work with. */ |
| return NO_VALID_MD_FOUND; |
| } |
| |
| cfg->al_offset = cfg->md_offset + cfg->md.al_offset * 512LL; |
| cfg->bm_offset = cfg->md_offset + cfg->md.bm_offset * 512LL; |
| cfg->max_usable_sect = max_usable_sectors(cfg); |
| clip_effective_size_and_bm_bytes(cfg); |
| |
| cfg->bits_set = -1U; |
| |
| /* FIXME paranoia verify that unused bits and words are unset... */ |
| /* FIXME paranoia verify that unused bits and words are unset... */ |
| |
| return VALID_MD_FOUND; |
| } |
| |
| int v07_md_disk_to_cpu(struct format *cfg) |
| { |
| struct md_cpu md; |
| int ok; |
| PREAD(cfg, on_disk_buffer, sizeof(struct md_on_disk_07), cfg->md_offset); |
| md_disk_07_to_cpu(&md, (struct md_on_disk_07*)on_disk_buffer); |
| ok = is_valid_md(DRBD_V07, &md, cfg->md_index, cfg->bd_size); |
| if (ok) |
| cfg->md = md; |
| return ok ? 0 : -1; |
| } |
| |
| int v07_md_cpu_to_disk(struct format *cfg) |
| { |
| if (!is_valid_md(DRBD_V07, &cfg->md, cfg->md_index, cfg->bd_size)) |
| return -1; |
| md_cpu_to_disk_07(on_disk_buffer, &cfg->md); |
| PWRITE(cfg, on_disk_buffer, sizeof(struct md_on_disk_07), cfg->md_offset); |
| return 0; |
| } |
| |
| int v07_parse(struct format *cfg, char **argv, int argc, int *ai) |
| { |
| long index; |
| char *e; |
| |
| if (argc < 2) { |
| fprintf(stderr, "Too few arguments for format\n"); |
| return -1; |
| } |
| |
| cfg->md_device_name = strdup(argv[0]); |
| if (!strcmp(argv[1],"internal")) { |
| index = |
| is_v07(cfg) ? DRBD_MD_INDEX_INTERNAL |
| : DRBD_MD_INDEX_FLEX_INT; |
| } else if (!strcmp(argv[1],"flex-external")) { |
| index = DRBD_MD_INDEX_FLEX_EXT; |
| } else if (!strcmp(argv[1],"flex-internal")) { |
| index = DRBD_MD_INDEX_FLEX_INT; |
| } else { |
| e = argv[1]; |
| errno = 0; |
| index = strtol(argv[1], &e, 0); |
| if (*e != 0 || 0 > index || index > 255 || errno != 0) { |
| fprintf(stderr, "'%s' is not a valid index number.\n", argv[1]); |
| return -1; |
| } |
| } |
| cfg->md_index = index; |
| |
| *ai += 2; |
| |
| return 0; |
| } |
| |
| int v07_md_initialize(struct format *cfg, int do_disk_writes, |
| int max_peers __attribute((unused))) |
| { |
| memset(&cfg->md, 0, sizeof(cfg->md)); |
| |
| cfg->md.effective_size = 0; |
| cfg->md.gc[Flags] = 0; |
| cfg->md.gc[HumanCnt] = 1; /* THINK 0? 1? */ |
| cfg->md.gc[TimeoutCnt] = 1; |
| cfg->md.gc[ConnectedCnt] = 1; |
| cfg->md.gc[ArbitraryCnt] = 1; |
| cfg->md.max_peers = 1; |
| cfg->md.magic = DRBD_MD_MAGIC_07; |
| /* No striping in v07! |
| * But some parts of the common code expect these members to be properly initialized. */ |
| cfg->md.al_stripes = 1; |
| cfg->md.al_stripe_size_4k = 8; |
| |
| return md_initialize_common(cfg, do_disk_writes); |
| } |
| |
| /****************************************** |
| }}} end of v07 |
| ******************************************/ |
| /****************************************** |
| begin of v08 {{{ |
| ******************************************/ |
| |
| /* if this returns with something != 0 in cfg->lk_bd.bd_size, |
| * caller knows he must move the meta data to actually find it. */ |
| void v08_check_for_resize(struct format *cfg) |
| { |
| struct md_cpu md_test; |
| off_t flex_offset; |
| int found = 0; |
| |
| /* you should not call me if you already found something. */ |
| ASSERT(cfg->md.magic == 0); |
| |
| /* check for resized lower level device ... only check for drbd 8 */ |
| if (format_version(cfg) < DRBD_V08) |
| return; |
| if (cfg->md_index != DRBD_MD_INDEX_FLEX_INT) |
| return; |
| |
| /* Do we know anything? Maybe it never was stored. */ |
| if (lk_bdev_load(cfg->minor, &cfg->lk_bd)) { |
| if (verbose) |
| fprintf(stderr, "no last-known offset information available.\n"); |
| return; |
| } |
| |
| if (verbose) { |
| fprintf(stderr, " last known info: %llu %s\n", |
| (unsigned long long)cfg->lk_bd.bd_size, |
| cfg->lk_bd.bd_name ?: "-unknown device name-"); |
| if (cfg->lk_bd.bd_uuid) |
| fprintf(stderr, " last known uuid: "X64(016)"\n", |
| cfg->lk_bd.bd_uuid); |
| } |
| |
| /* I just checked that offset, nothing to see there. */ |
| if (cfg->lk_bd.bd_size == cfg->bd_size) |
| return; |
| |
| flex_offset = v07_style_md_get_byte_offset( |
| DRBD_MD_INDEX_FLEX_INT, cfg->lk_bd.bd_size); |
| |
| /* actually check that offset, if it is accessible. */ |
| /* If someone shrunk that device, I won't be able to read it! */ |
| if (flex_offset < cfg->bd_size) { |
| PREAD(cfg, on_disk_buffer, 4096, flex_offset); |
| if (is_v08(cfg)) { |
| md_disk_08_to_cpu(&md_test, (struct md_on_disk_08*)on_disk_buffer); |
| found = is_valid_md(DRBD_V08, &md_test, DRBD_MD_INDEX_FLEX_INT, cfg->lk_bd.bd_size); |
| } else if (is_v09(cfg)) { |
| md_disk_09_to_cpu(&md_test, (struct meta_data_on_disk_9*)on_disk_buffer); |
| found = is_valid_md(DRBD_V09, &md_test, DRBD_MD_INDEX_FLEX_INT, cfg->lk_bd.bd_size); |
| } |
| } |
| |
| if (verbose) { |
| fprintf(stderr, "While checking for internal meta data for drbd%u on %s,\n" |
| "it appears that it may have been relocated.\n" |
| "It used to be ", cfg->minor, cfg->md_device_name); |
| if (cfg->lk_bd.bd_name && |
| strcmp(cfg->lk_bd.bd_name, cfg->md_device_name)) { |
| fprintf(stderr, "on %s ", cfg->lk_bd.bd_name); |
| } |
| fprintf(stderr, "at byte offset %llu", (unsigned long long)flex_offset); |
| |
| if (!found) { |
| fprintf(stderr, ", but I cannot find it now.\n"); |
| if (flex_offset >= cfg->bd_size) |
| fprintf(stderr, "Device is too small now!\n"); |
| } else |
| fprintf(stderr, ", and seems to still be valid.\n"); |
| } |
| |
| if (found) { |
| if (cfg->lk_bd.bd_uuid && md_test.device_uuid != cfg->lk_bd.bd_uuid) { |
| fprintf(stderr, "Last known and found uuid differ!?\n" |
| X64(016)" != "X64(016)"\n", |
| cfg->lk_bd.bd_uuid, cfg->md.device_uuid); |
| if (!force) { |
| found = 0; |
| fprintf(stderr, "You may --force me to ignore that.\n"); |
| } else |
| fprintf(stderr, "You --force'ed me to ignore that.\n"); |
| } |
| } |
| if (found) |
| cfg->md = md_test; |
| return; |
| } |
| |
| int v08_md_open(struct format *cfg) { |
| int r = v07_style_md_open(cfg); |
| if (r == VALID_MD_FOUND) |
| return r; |
| |
| v08_check_for_resize(cfg); |
| if (!cfg->lk_bd.bd_size || !cfg->md.magic) |
| return NO_VALID_MD_FOUND; |
| else |
| return VALID_MD_FOUND_AT_LAST_KNOWN_LOCATION; |
| } |
| |
| int v08_md_disk_to_cpu(struct format *cfg) |
| { |
| struct md_cpu md; |
| int ok; |
| PREAD(cfg, on_disk_buffer, sizeof(struct md_on_disk_08), cfg->md_offset); |
| md_disk_08_to_cpu(&md, (struct md_on_disk_08*)on_disk_buffer); |
| ok = is_valid_md(DRBD_V08, &md, cfg->md_index, cfg->bd_size); |
| if (ok) |
| cfg->md = md; |
| if (verbose >= 3 + !!ok && verbose <= 10) |
| fprintf_hex(stderr, cfg->md_offset, on_disk_buffer, 4096); |
| return ok ? 0 : -1; |
| } |
| |
| int v08_md_cpu_to_disk(struct format *cfg) |
| { |
| if (!is_valid_md(DRBD_V08, &cfg->md, cfg->md_index, cfg->bd_size)) |
| return -1; |
| md_cpu_to_disk_08((struct md_on_disk_08 *)on_disk_buffer, &cfg->md); |
| PWRITE(cfg, on_disk_buffer, sizeof(struct md_on_disk_08), cfg->md_offset); |
| cfg->update_lk_bdev = 1; |
| return 0; |
| } |
| |
| int v08_md_initialize(struct format *cfg, int do_disk_writes, |
| int max_peers __attribute((unused))) |
| { |
| size_t i; |
| |
| memset(&cfg->md, 0, sizeof(cfg->md)); |
| |
| cfg->md.effective_size = 0; |
| cfg->md.current_uuid = UUID_JUST_CREATED; |
| cfg->md.peers[0].bitmap_uuid = 0; |
| for (i = 0; i < ARRAY_SIZE(cfg->md.history_uuids); i++) |
| cfg->md.history_uuids[i] = 0; |
| cfg->md.flags = MDF_AL_CLEAN; |
| cfg->md.max_peers = 1; |
| cfg->md.magic = DRBD_MD_MAGIC_08; |
| cfg->md.al_stripes = option_al_stripes; |
| cfg->md.al_stripe_size_4k = option_al_stripe_size_4k; |
| |
| return md_initialize_common(cfg, do_disk_writes); |
| } |
| |
| int v08_md_close(struct format *cfg) |
| { |
| /* update last known info, if we changed anything, |
| * or if explicitly requested. */ |
| if (cfg->update_lk_bdev && !dry_run) { |
| if (cfg->md_index != DRBD_MD_INDEX_FLEX_INT) |
| lk_bdev_delete(cfg->minor); |
| else { |
| cfg->lk_bd.bd_size = cfg->bd_size; |
| cfg->lk_bd.bd_uuid = cfg->md.device_uuid; |
| cfg->lk_bd.bd_name = cfg->md_device_name; |
| lk_bdev_save(cfg->minor, &cfg->lk_bd); |
| } |
| } |
| return generic_md_close(cfg); |
| } |
| |
| /****************************************** |
| begin of v09 {{{ |
| ******************************************/ |
| int v09_md_disk_to_cpu(struct format *cfg) |
| { |
| struct md_cpu md; |
| int ok; |
| PREAD(cfg, on_disk_buffer, sizeof(struct meta_data_on_disk_9), cfg->md_offset); |
| md_disk_09_to_cpu(&md, (struct meta_data_on_disk_9*)on_disk_buffer); |
| ok = is_valid_md(DRBD_V09, &md, cfg->md_index, cfg->bd_size); |
| if (ok) |
| cfg->md = md; |
| if (verbose >= 3 + !!ok && verbose <= 10) |
| fprintf_hex(stderr, cfg->md_offset, on_disk_buffer, 4096); |
| return ok ? 0 : -1; |
| } |
| |
| int v09_md_cpu_to_disk(struct format *cfg) |
| { |
| if (!is_valid_md(DRBD_V09, &cfg->md, cfg->md_index, cfg->bd_size)) |
| return -1; |
| md_cpu_to_disk_09((struct meta_data_on_disk_9 *)on_disk_buffer, &cfg->md); |
| PWRITE(cfg, on_disk_buffer, sizeof(struct meta_data_on_disk_9), cfg->md_offset); |
| cfg->update_lk_bdev = 1; |
| return 0; |
| } |
| |
| int v09_md_initialize(struct format *cfg, int do_disk_writes, int max_peers) |
| { |
| int p, i; |
| |
| memset(&cfg->md, 0, sizeof(cfg->md)); |
| |
| cfg->md.effective_size = 0; |
| cfg->md.max_peers = max_peers; |
| cfg->md.flags = MDF_AL_CLEAN; |
| cfg->md.node_id = -1; |
| cfg->md.magic = DRBD_MD_MAGIC_09; |
| cfg->md.al_stripes = option_al_stripes; |
| cfg->md.al_stripe_size_4k = option_al_stripe_size_4k; |
| |
| cfg->md.current_uuid = UUID_JUST_CREATED; |
| for (i = 0; i < ARRAY_SIZE(cfg->md.history_uuids); i++) |
| cfg->md.history_uuids[i] = 0; |
| |
| for (p = 0; p < DRBD_NODE_ID_MAX; p++) { |
| cfg->md.peers[p].bitmap_uuid = 0; |
| cfg->md.peers[p].flags = 0; |
| cfg->md.peers[p].bitmap_index = -1; |
| } |
| |
| return md_initialize_common(cfg, do_disk_writes); |
| } |
| |
| /****************************************** |
| }}} end of v09 |
| ******************************************/ |
| |
| int meta_get_gi(struct format *cfg, char **argv __attribute((unused)), int argc) |
| { |
| if (argc > 0) { |
| fprintf(stderr, "Ignoring additional arguments\n"); |
| } |
| |
| if (cfg->ops->open(cfg)) |
| return -1; |
| |
| cfg->ops->get_gi(&cfg->md, option_node_id); |
| |
| return cfg->ops->close(cfg); |
| } |
| |
| int meta_show_gi(struct format *cfg, char **argv __attribute((unused)), int argc) |
| { |
| char ppb[10]; |
| |
| if (argc > 0) { |
| fprintf(stderr, "Ignoring additional arguments\n"); |
| } |
| |
| if (cfg->ops->open(cfg)) |
| return -1; |
| |
| // find the correct slot from node-id. |
| |
| cfg->ops->show_gi(&cfg->md, option_node_id); |
| |
| if (cfg->md.effective_size) { |
| printf("last agreed size: %s (%llu sectors)\n", |
| ppsize(ppb, cfg->md.effective_size >> 1), |
| (unsigned long long)cfg->md.effective_size); |
| printf("last agreed max bio size: %u Byte\n", |
| cfg->md.la_peer_max_bio_size); |
| #if 0 |
| /* FIXME implement count_bits() */ |
| printf("%u bits set in the bitmap [ %s out of sync ]\n", |
| cfg->bits_set, ppsize(ppb, cfg->bits_set * 4)); |
| #endif |
| } else { |
| printf("zero size device -- never seen peer yet?\n"); |
| } |
| |
| return cfg->ops->close(cfg); |
| } |
| |
| int meta_dstate(struct format *cfg, char **argv __attribute((unused)), int argc) |
| { |
| if (argc > 0) { |
| fprintf(stderr, "Ignoring additional arguments\n"); |
| } |
| |
| if (cfg->ops->open(cfg)) { |
| fprintf(stderr, "No valid meta data found\n"); |
| return -1; |
| } |
| |
| if(cfg->md.flags & MDF_CONSISTENT) { |
| if(cfg->md.flags & MDF_WAS_UP_TO_DATE) { |
| if (cfg->md.flags & MDF_PEER_OUT_DATED) |
| printf("UpToDate\n"); |
| else |
| printf("Consistent\n"); |
| } else { |
| printf("Outdated\n"); |
| } |
| } else { |
| printf("Inconsistent\n"); |
| } |
| |
| return cfg->ops->close(cfg); |
| } |
| |
| int meta_set_gi(struct format *cfg, char **argv, int argc) |
| { |
| struct md_cpu tmp; |
| int err; |
| |
| if (argc > 1) { |
| fprintf(stderr, "Ignoring additional arguments\n"); |
| } |
| if (argc < 1) { |
| fprintf(stderr, "Required Argument missing\n"); |
| exit(10); |
| } |
| |
| if (cfg->ops->open(cfg)) |
| return -1; |
| |
| tmp = cfg->md; |
| cfg->ops->set_gi(&tmp, option_node_id, argv, argc); |
| printf("previously "); |
| cfg->ops->get_gi(&cfg->md, option_node_id); |
| printf("set GI to "); |
| cfg->ops->get_gi(&tmp, option_node_id); |
| |
| if (!confirmed("Write new GI to disk?")) { |
| printf("Operation canceled.\n"); |
| exit(0); |
| } |
| |
| cfg->md = tmp; |
| |
| err = cfg->ops->md_cpu_to_disk(cfg); |
| err = cfg->ops->close(cfg) || err; |
| if (err) |
| fprintf(stderr, "update failed\n"); |
| |
| return err; |
| } |
| |
| void print_dump_header() |
| { |
| char time_str[60]; |
| time_t t = time(NULL); |
| int i; |
| |
| strftime(time_str, sizeof(time_str), "%F %T %z [%s]", localtime(&t)); |
| printf("# DRBD meta data dump\n# %s\n# %s>", |
| time_str, get_hostname()); |
| |
| for (i=0; i < global_argc; i++) |
| printf(" %s",global_argv[i]); |
| printf("\n#\n\n"); |
| } |
| |
| char *pretty_peer_md_flags(char *inbuf, unsigned int buf_size, unsigned int flags, const char *first_sep, const char *sep) |
| { |
| static const char *flag_name[32] = { |
| /* MDF_PEER_CONNECTED */ [0] = "connected", |
| /* MDF_PEER_OUTDATED */ [1] = "<=outdated", |
| /* MDF_PEER_FENCING */ [2] = "fencing", |
| /* MDF_PEER_FULL_SYNC */ [3] = "full-sync", |
| /* MDF_PEER_DEVICE_SEEN */ [4] = "seen", |
| /* MDF_NODE_EXISTS */ [16] = "exists", |
| }; |
| |
| char *buf = inbuf; |
| int n = buf_size; |
| int c; |
| int i; |
| const char *cur_sep = first_sep; |
| |
| *buf = '\0'; |
| for (i = 0; i < 32; i++) { |
| unsigned int f = 1U << i; |
| if ((flags & f) == 0) |
| continue; |
| |
| if (flag_name[i]) |
| c = snprintf(buf, n, "%s%s", cur_sep, flag_name[i]); |
| else |
| c = snprintf(buf, n, "%s0x%x=?", cur_sep, f); |
| cur_sep = sep; |
| if (c < 0 || c >= n) |
| break; |
| buf += c; |
| n -= c; |
| } |
| return inbuf; |
| } |
| |
| int meta_dump_md(struct format *cfg, char **argv __attribute((unused)), int argc) |
| { |
| int al_is_clean; |
| int i; |
| |
| if (argc > 0) { |
| fprintf(stderr, "Ignoring additional arguments\n"); |
| } |
| |
| i = cfg->ops->open(cfg); |
| if (i == NO_VALID_MD_FOUND) { |
| fprintf(stderr, "No valid meta data found\n"); |
| return -1; |
| } |
| |
| al_is_clean = |
| DRBD_MD_MAGIC_84_UNCLEAN != cfg->md.magic && |
| (cfg->md.flags & MDF_AL_CLEAN) != 0; |
| |
| if (!al_is_clean) { |
| fprintf(stderr, "Found meta data is \"unclean\", please apply-al first\n"); |
| if (!force) |
| return -1; |
| } |
| |
| print_dump_header(); |
| printf("version \"%s\";\n\n", cfg->ops->name); |
| |
| if (!al_is_clean) |
| /* So we have been forced. Still cause a parse error for restore-md. */ |
| printf("This_is_an_unclean_meta_data_dump._Don't_trust_the_bitmap.\n" |
| "# You should \"apply-al\" first, if you plan to restore this.\n\n"); |
| |
| if (format_version(cfg) >= DRBD_V09) |
| printf("max-peers %d;\n", cfg->md.max_peers); |
| printf("# md_size_sect %llu\n", (long long unsigned)cfg->md.md_size_sect); |
| |
| if (i == VALID_MD_FOUND_AT_LAST_KNOWN_LOCATION) { |
| printf("#\n" |
| "### Device seems to have been resized!\n" |
| "### dumping meta data from the last known position\n" |
| "### current size of %s: %llu byte\n" |
| "### expected position of meta data:\n", |
| cfg->md_device_name, (unsigned long long)cfg->bd_size); |
| |
| printf("## md_offset %llu\n", (long long unsigned)cfg->md_offset); |
| printf("## al_offset %llu\n", (long long unsigned)cfg->al_offset); |
| printf("## bm_offset %llu\n", (long long unsigned)cfg->bm_offset); |
| |
| printf( |
| "### last known size of %s: %llu byte\n" |
| "### adjusted position of meta data:\n", |
| cfg->lk_bd.bd_name ?: "-?-", |
| (unsigned long long)cfg->lk_bd.bd_size); |
| |
| cfg->md_offset = v07_style_md_get_byte_offset( |
| DRBD_MD_INDEX_FLEX_INT, cfg->lk_bd.bd_size); |
| |
| cfg->al_offset = cfg->md_offset + cfg->md.al_offset * 512LL; |
| cfg->bm_offset = cfg->md_offset + cfg->md.bm_offset * 512LL; |
| cfg->bm_bytes = bm_bytes(&cfg->md, cfg->md.effective_size); |
| } |
| printf("# md_offset %llu\n", (long long unsigned)cfg->md_offset); |
| printf("# al_offset %llu\n", (long long unsigned)cfg->al_offset); |
| printf("# bm_offset %llu\n", (long long unsigned)cfg->bm_offset); |
| printf("\n"); |
| |
| switch (format_version(cfg)) { |
| case DRBD_V06: |
| case DRBD_V07: |
| printf("gc {\n "); |
| for (i = 0; i < GEN_CNT_SIZE; i++) { |
| printf(" %d;", cfg->md.gc[i]); |
| } |
| printf("\n}\n"); |
| break; |
| case DRBD_V08: |
| printf("uuid {\n"); |
| printf(" 0x"X64(016)"; 0x"X64(016)"; 0x"X64(016)"; 0x"X64(016)";\n", |
| cfg->md.current_uuid, |
| cfg->md.peers[0].bitmap_uuid, |
| cfg->md.history_uuids[0], |
| cfg->md.history_uuids[1]); |
| printf(" flags 0x"X32(08)";\n", cfg->md.peers[0].flags); |
| printf("}\n"); |
| break; |
| case DRBD_V09: |
| printf("node-id %d;\n" |
| "current-uuid 0x"X64(016)";\n" |
| "flags 0x"X32(08)";\n", |
| cfg->md.node_id, |
| cfg->md.current_uuid, cfg->md.flags); |
| for (i = 0; i < DRBD_NODE_ID_MAX; i++) { |
| struct peer_md_cpu *peer = &cfg->md.peers[i]; |
| char flag_buf[80]; |
| |
| printf("peer[%d] {\n", i); |
| if (format_version(cfg) >= DRBD_V09) { |
| printf(" bitmap-index %d;\n", |
| peer->bitmap_index); |
| } |
| printf(" bitmap-uuid 0x"X64(016)";\n" |
| " bitmap-dagtag 0x"X64(016)";\n" |
| " flags 0x"X32(08)";%s\n", |
| peer->bitmap_uuid, |
| peer->bitmap_dagtag, |
| peer->flags, |
| pretty_peer_md_flags(flag_buf, sizeof(flag_buf), |
| peer->flags, " # ", " | ")); |
| printf("}\n"); |
| } |
| printf("history-uuids {"); |
| for (i = 0; i < ARRAY_SIZE(cfg->md.history_uuids); i++) |
| printf("%s0x"X64(016)";", |
| i % 4 ? " " : "\n ", |
| cfg->md.history_uuids[i]); |
| printf("\n}\n"); |
| break; |
| case DRBD_UNKNOWN: |
| fprintf(stderr, "BUG in %s().\n", __FUNCTION__); |
| } |
| |
| if (format_version(cfg) >= DRBD_V07) { |
| printf("# al-extents %u;\n", cfg->md.al_nr_extents); |
| printf("la-size-sect "U64";\n", cfg->md.effective_size); |
| if (format_version(cfg) >= DRBD_V08) { |
| printf("bm-byte-per-bit "U32";\n", |
| cfg->md.bm_bytes_per_bit); |
| printf("device-uuid 0x"X64(016)";\n", |
| cfg->md.device_uuid); |
| printf("la-peer-max-bio-size %d;\n", |
| cfg->md.la_peer_max_bio_size); |
| printf("al-stripes "U32";\n", |
| cfg->md.al_stripes); |
| printf("al-stripe-size-4k "U32";\n", |
| cfg->md.al_stripe_size_4k); |
| } |
| printf("# bm-bytes %u;\n", cfg->bm_bytes); |
| printf_bm(cfg); /* pretty prints the whole bitmap */ |
| printf("# bits-set %u;\n", cfg->bits_set); |
| |
| /* This is half assed, still. Hide it. */ |
| if (verbose >= 10) |
| printf_al(cfg); |
| } |
| |
| return cfg->ops->close(cfg); |
| } |
| |
| void md_parse_error(int expected_token, int seen_token,const char *etext) |
| { |
| if (!etext) { |
| switch(expected_token) { |
| /* leading space indicates to strip off "expected" below */ |
| default : etext = " invalid/unexpected token!"; break; |
| case 0 : etext = "end of file"; break; |
| case ';': etext = "semicolon (;)"; break; |
| case '{': etext = "opening brace ({)"; break; |
| case '}': etext = "closing brace (})"; break; |
| case '[': etext = "opening bracket ([)"; break; |
| case ']': etext = "closing bracket (])"; break; |
| case TK_BM: |
| etext = "keyword 'bm'"; break; |
| case TK_BITMAP: |
| etext = "keyword 'bitmap'"; break; |
| case TK_BM_BYTE_PER_BIT: |
| etext = "keyword 'bm-byte-per-bit'"; break; |
| case TK_DEVICE_UUID: |
| etext = "keyword 'device-uuid'"; break; |
| case TK_FLAGS: |
| etext = "keyword 'flags'"; break; |
| case TK_GC: |
| etext = "keyword 'gc'"; break; |
| case TK_LA_SIZE: |
| etext = "keyword 'la-size-sect'"; break; |
| case TK_TIMES: |
| etext = "keyword 'times'"; break; |
| case TK_UUID: |
| etext = "keyword 'uuid'"; break; |
| case TK_VERSION: |
| etext = "keyword 'version'"; break; |
| case TK_NODE_ID: |
| etext = "keyword 'node-id'"; break; |
| case TK_CURRENT_UUID: |
| etext = "keyword 'current-uuid'"; break; |
| case TK_BITMAP_UUID: |
| etext = "keyword 'bitmap-uuid'"; break; |
| case TK_BITMAP_DAGTAG: |
| etext = "keyword 'bitmap-dagtag'"; break; |
| case TK_PEER: |
| etext = "keyword 'peer'"; break; |
| case TK_HASH: |
| etext = "keyword 'hash'"; break; |
| case TK_MAX_PEERS: |
| etext = "keyword 'max-peers'"; break; |
| case TK_NUM: |
| etext = "number ([0-9], up to 20 digits)"; break; |
| case TK_STRING: |
| etext = "short quoted string " |
| "(\"..up to 20 characters, no newline..\")"; |
| break; |
| case TK_U32: |
| etext = "an 8-digit hex number"; break; |
| case TK_U64: |
| etext = "a 16-digit hex number"; break; |
| } |
| } |
| fflush(stdout); |
| fprintf(stderr,"Parse error in line %u: %s%s", |
| yylineno, etext, |
| (etext[0] == ' ' ? ":" : " expected") |
| ); |
| |
| switch(seen_token) { |
| case 0: |
| fprintf(stderr, ", but end of file encountered\n"); break; |
| |
| case 1 ... 58: /* ord(';') == 58 */ |
| case 60 ... 122: /* ord('{') == 123 */ |
| case 124: /* ord('}') == 125 */ |
| case 126 ... 257: |
| /* oopsie. these should never be returned! */ |
| fprintf(stderr, "; got token value %u (this should never happen!)\n", seen_token); break; |
| break; |
| |
| case TK_INVALID_CHAR: |
| fprintf(stderr,"; got invalid input character '\\x%02x' [%c]\n", |
| (unsigned char)yylval.txt[0], yylval.txt[0]); |
| break; |
| case ';': case '{': case '}': |
| fprintf(stderr, ", not '%c'\n", seen_token); break; |
| case TK_NUM: |
| case TK_U32: |
| case TK_U64: |
| fprintf(stderr, ", not some number\n"); break; |
| case TK_INVALID: |
| /* already reported by scanner */ |
| fprintf(stderr,"\n"); break; |
| default: |
| fprintf(stderr, ", not '%s'\n", yylval.txt); |
| } |
| exit(10); |
| } |
| |
| static void EXP(int expected_token) { |
| int tok = yylex(); |
| if (tok != expected_token) |
| md_parse_error(expected_token, tok, NULL); |
| } |
| |
| static int assign_32_of_64bit(int i, uint64_t value, int max_peers) |
| { |
| le_u32 *bm = on_disk_buffer; |
| |
| if (i >= buffer_size / sizeof(*bm)) |
| return i; // Do no advance i after leaving the window |
| |
| if (i >= 0) { // only assign data, while within the window |
| if (((i / max_peers) & 1) == 0) |
| bm[i].le = cpu_to_le32((uint32_t) value); // little endian low word => lower address |
| else |
| bm[i].le = cpu_to_le32((uint32_t) (value >> 32)); |
| } |
| |
| return i + max_peers; |
| } |
| |
| int parse_bitmap_window_one_peer(struct format *cfg, int window, int peer_nr, int parse_only) |
| { |
| unsigned int max_peers = cfg->md.max_peers; |
| le_u32 *bm = on_disk_buffer; |
| uint64_t value; |
| int i, times; |
| |
| i = peer_nr - window * (buffer_size / sizeof(*bm)); |
| |
| if (format_version(cfg) < DRBD_V09) |
| EXP(TK_BM); |
| else { |
| EXP(TK_BITMAP); EXP('['); |
| EXP(TK_NUM); EXP(']'); |
| if (yylval.u64 != peer_nr) { |
| fprintf(stderr, "Parse error in line %u: " |
| "Expected peer slot %d but found %d\n", |
| yylineno, i, (int)yylval.u64); |
| exit(10); |
| } |
| } |
| EXP('{'); |
| |
| while(1) { |
| int tok = yylex(); |
| switch(tok) { |
| case TK_U64: |
| EXP(';'); |
| /* NOTE: |
| * even though this EXP(';'); already advanced |
| * to the next token, yylval will *not* be updated |
| * for * ';', so it is still valid. |
| * |
| * This seemed to be the least ugly way to implement a |
| * "parse_only" functionality without ugly if-branches |
| * or the maintenance nightmare of code duplication */ |
| if (parse_only) { |
| i += max_peers * (sizeof(value) / sizeof(*bm)); |
| break; |
| } |
| value = yylval.u64; |
| |
| i = assign_32_of_64bit(i, value, max_peers); |
| i = assign_32_of_64bit(i, value, max_peers); |
| break; |
| case TK_NUM: |
| times = yylval.u64; |
| EXP(TK_TIMES); |
| EXP(TK_U64); |
| EXP(';'); |
| if (parse_only) { |
| i += times * max_peers * (sizeof(value) / sizeof(*bm)); |
| break; |
| } |
| value = yylval.u64; |
| while(times--) { |
| i = assign_32_of_64bit(i, value, max_peers); |
| i = assign_32_of_64bit(i, value, max_peers); |
| } |
| break; |
| case '}': |
| goto break_loop; |
| default: |
| md_parse_error(0 /* ignored, since etext is set */, |
| tok, "repeat count, 16-digit hex number, or closing brace (})"); |
| goto break_loop; |
| } |
| } |
| break_loop: |
| |
| return i - peer_nr; |
| } |
| |
| int parse_bitmap_window(struct format *cfg, int window, int parse_only) |
| { |
| int words = 0, i; |
| |
| if (format_version(cfg) < DRBD_V09) { |
| return parse_bitmap_window_one_peer(cfg, window, 0, parse_only); |
| } else /* >= DRBD_V09 */ { |
| for (i = 0; i < cfg->md.max_peers; i++) { |
| words = parse_bitmap_window_one_peer(cfg, window, i, parse_only); |
| } |
| } |
| return words; |
| } |
| |
| void parse_bitmap(struct format *cfg, int parse_only) |
| { |
| le_u32 *bm = on_disk_buffer; |
| off_t bm_max_on_disk_off; |
| long start_pos; |
| int window = 0; |
| int words; |
| int truncated = 0; |
| |
| start_pos = ftell(yyin) - my_yy_unscaned_characters(); |
| |
| bm_max_on_disk_off = cfg->bm_offset + ALIGN(cfg->bm_bytes, 4096); |
| |
| do { |
| fseek(yyin, start_pos, SEEK_SET); |
| yyrestart(yyin); |
| |
| words = parse_bitmap_window(cfg, window, parse_only); |
| |
| if (words > 0 && !truncated) { |
| size_t s = words * sizeof(*bm); |
| size_t c; |
| |
| memset(bm + words, 0x00, buffer_size - s); |
| /* need to sector-align this for O_DIRECT. to be |
| * generic, maybe we even need to PAGE align it? */ |
| s = ALIGN(s, cfg->md_hard_sect_size); |
| if (parse_only) { |
| c = bm_max_on_disk_off - |
| (cfg->bm_offset + window * buffer_size); |
| if (c > s) |
| c = s; |
| } else |
| c = pwrite_with_limit_or_die(cfg, on_disk_buffer, |
| s, cfg->bm_offset + window * buffer_size, |
| bm_max_on_disk_off, |
| "meta_restore_md"); |
| if (s != c) { |
| fprintf(stderr, "Bitmap info too large, truncated!\n"); |
| /* If the bitmap info was truncated, there will |
| * be garbage, still, and the EXP(0) below would |
| * crap out. "Drain" that garbage here, |
| * while still checking for parse errors. |
| */ |
| truncated = 1; |
| } |
| } |
| |
| window++; |
| } while (words == buffer_size / sizeof(*bm)); |
| } |
| |
| int verify_dumpfile_or_restore(struct format *cfg, char **argv, int argc, int parse_only) |
| { |
| int old_max_peers = -1; |
| int new_max_peers = 1; |
| int i; |
| int err; |
| char slots_seen[DRBD_NODE_ID_MAX] = { 0, }; |
| int cur_slot; |
| |
| if (argc > 0) { |
| yyin = fopen(argv[0],"r"); |
| if(yyin == NULL) { |
| fprintf(stderr, "open of '%s' failed.\n",argv[0]); |
| exit(20); |
| } |
| } |
| |
| if (!parse_only) { |
| if (cfg->ops->open(cfg) != NO_VALID_MD_FOUND) { |
| old_max_peers = cfg->md.max_peers; |
| if (!confirmed("Valid meta-data in place, overwrite?")) |
| return -1; |
| } else { |
| ASSERT(!is_v06(cfg)); |
| } |
| } |
| |
| EXP(TK_VERSION); EXP(TK_STRING); |
| if(strcmp(yylval.txt,cfg->ops->name)) { |
| fprintf(stderr,"dump is '%s' you requested '%s'.\n", |
| yylval.txt,cfg->ops->name); |
| exit(10); |
| } |
| EXP(';'); |
| if (is_v09(cfg)) { |
| EXP(TK_MAX_PEERS); |
| EXP(TK_NUM); EXP(';'); |
| new_max_peers = yylval.u64; |
| } |
| |
| cfg->ops->md_initialize(cfg, 0, new_max_peers); |
| if (!parse_only) { |
| fprintf(stderr, "reinitializing\n"); |
| if (old_max_peers < new_max_peers && |
| cfg->md_index != DRBD_MD_INDEX_FLEX_INT) { |
| printf("Meta data needs more space now, since max_peers\n" |
| "is bigger than in existing meta_data. (%d -> %d)\n", |
| old_max_peers, new_max_peers); |
| } |
| |
| check_for_existing_data(cfg); |
| } |
| |
| |
| if (format_version(cfg) < DRBD_V08) { |
| EXP(TK_GC); EXP('{'); |
| for (i = 0; i < GEN_CNT_SIZE; i++) { |
| EXP(TK_NUM); EXP(';'); |
| cfg->md.gc[i] = yylval.u64; |
| } |
| EXP('}'); |
| } else { // >= 08 |
| if (is_v08(cfg)) { |
| EXP(TK_UUID); EXP('{'); |
| EXP(TK_U64); EXP(';'); |
| cfg->md.current_uuid = yylval.u64; |
| EXP(TK_U64); EXP(';'); |
| cfg->md.peers[0].bitmap_uuid = yylval.u64; |
| for (i = 0; i < HISTORY_UUIDS_V08; i++) { |
| EXP(TK_U64); EXP(';'); |
| cfg->md.history_uuids[i] = yylval.u64; |
| } |
| EXP(TK_FLAGS); EXP(TK_U32); EXP(';'); |
| cfg->md.flags = (uint32_t)yylval.u64; |
| EXP('}'); |
| } else /* >= 09 */ { |
| EXP(TK_NODE_ID); |
| EXP(TK_NUM); EXP(';'); |
| cfg->md.node_id = yylval.u64; |
| EXP(TK_CURRENT_UUID); |
| EXP(TK_U64); EXP(';'); |
| cfg->md.current_uuid = yylval.u64; |
| EXP(TK_FLAGS); EXP(TK_U32); EXP(';'); |
| cfg->md.flags = (uint32_t)yylval.u64; |
| |
| for (i = 0; i < DRBD_NODE_ID_MAX; i++) { |
| EXP(TK_PEER); EXP('['); |
| EXP(TK_NUM); EXP(']'); |
| cur_slot = yylval.u64; |
| if (cur_slot < 0 || cur_slot >= DRBD_NODE_ID_MAX) { |
| fprintf(stderr, "Parse error in line %u: " |
| "Slot %d out of range\n", |
| yylineno, cur_slot); |
| exit(10); |
| } |
| if (slots_seen[cur_slot]) { |
| fprintf(stderr, "Parse error in line %u: " |
| "Peer slot %d defined multiple times\n", |
| yylineno, cur_slot); |
| exit(10); |
| } |
| slots_seen[cur_slot] = 1; |
| EXP('{'); |
| EXP(TK_BITMAP_INDEX); |
| EXP(TK_NUM); EXP(';'); |
| cfg->md.peers[cur_slot].bitmap_index = yylval.u64; |
| EXP(TK_BITMAP_UUID); EXP(TK_U64); EXP(';'); |
| cfg->md.peers[cur_slot].bitmap_uuid = yylval.u64; |
| EXP(TK_BITMAP_DAGTAG); EXP(TK_U64); EXP(';'); |
| cfg->md.peers[cur_slot].bitmap_dagtag = yylval.u64; |
| EXP(TK_FLAGS); EXP(TK_U32); EXP(';'); |
| cfg->md.peers[cur_slot].flags = (uint32_t)yylval.u64; |
| EXP('}'); |
| } |
| EXP(TK_HISTORY_UUIDS); EXP('{'); |
| for (i = 0; i < ARRAY_SIZE(cfg->md.history_uuids); i++) { |
| EXP(TK_U64); EXP(';'); |
| cfg->md.history_uuids[i] = yylval.u64; |
| } |
| EXP('}'); |
| } |
| } |
| EXP(TK_LA_SIZE); EXP(TK_NUM); EXP(';'); |
| cfg->md.effective_size = yylval.u64; |
| if (format_version(cfg) >= DRBD_V08) { |
| EXP(TK_BM_BYTE_PER_BIT); EXP(TK_NUM); EXP(';'); |
| cfg->md.bm_bytes_per_bit = yylval.u64; |
| /* Check whether the value of bm_bytes_per_bit is |
| * a power-of-two multiple of 4k. */ |
| if (yylval.u64 < 4096 || (yylval.u64 & (yylval.u64 -1)) != 0) { |
| fprintf(stderr, "Invalid value for bm-byte-per-bit: " |
| "value must be a power-of-two multiple of 4096\n"); |
| exit(10); |
| } |
| EXP(TK_DEVICE_UUID); EXP(TK_U64); EXP(';'); |
| cfg->md.device_uuid = yylval.u64; |
| EXP(TK_LA_BIO_SIZE); EXP(TK_NUM); EXP(';'); |
| cfg->md.la_peer_max_bio_size = yylval.u64; |
| |
| EXP(TK_AL_STRIPES); EXP(TK_NUM); EXP(';'); |
| cfg->md.al_stripes = yylval.u64; |
| EXP(TK_AL_STRIPE_SIZE_4K); EXP(TK_NUM); EXP(';'); |
| cfg->md.al_stripe_size_4k = yylval.u64; |
| } else { |
| cfg->md.bm_bytes_per_bit = DEFAULT_BM_BLOCK_SIZE; |
| } |
| |
| if (option_al_stripes != cfg->md.al_stripes || |
| option_al_stripe_size_4k != cfg->md.al_stripe_size_4k) { |
| if (option_al_stripes_used) { |
| fprintf(stderr, "override activity log striping from commandline\n"); |
| cfg->md.al_stripes = option_al_stripes; |
| cfg->md.al_stripe_size_4k = option_al_stripe_size_4k; |
| } |
| if (verbose >= 2) |
| fprintf(stderr, "adjusting activity-log and bitmap offsets\n"); |
| re_initialize_md_offsets(cfg); |
| } |
| |
| clip_effective_size_and_bm_bytes(cfg); |
| parse_bitmap(cfg, parse_only); |
| |
| /* there should be no trailing garbage in the input file */ |
| EXP(0); |
| |
| if (parse_only) { |
| printf("input file parsed ok\n"); |
| return 0; |
| } |
| |
| err = cfg->ops->md_cpu_to_disk(cfg); |
| err = cfg->ops->close(cfg) || err; |
| if (err) { |
| fprintf(stderr, "Writing failed\n"); |
| return -1; |
| } |
| |
| printf("Successfully restored meta data\n"); |
| |
| return 0; |
| } |
| |
| int meta_restore_md(struct format *cfg, char **argv, int argc) |
| { |
| return verify_dumpfile_or_restore(cfg,argv,argc,0); |
| } |
| |
| int meta_verify_dump_file(struct format *cfg, char **argv, int argc) |
| { |
| return verify_dumpfile_or_restore(cfg,argv,argc,1); |
| } |
| |
| void md_convert_07_to_08(struct format *cfg) |
| { |
| int i,j; |
| /* |
| * FIXME |
| * what about the UI_BITMAP, and the Activity Log? |
| * how to bring them over for internal meta data? |
| * |
| * maybe just refuse to convert anything that is not |
| * "clean"? how to detect that? |
| * |
| * FIXME: if I am a crashed R_PRIMARY, or D_INCONSISTENT, |
| * or Want-Full-Sync or the like, |
| * refuse, and indicate how to solve this */ |
| |
| printf("Converting meta data...\n"); |
| |
| //if (!cfg->bits_counted) count_bits(cfg); |
| /* FIXME: |
| * if this is "internal" meta data, and I have bits set, |
| * either move the bitmap into the newly expected place, |
| * or refuse, and indicate how to solve this */ |
| |
| /* KB <-> sectors is done in the md disk<->cpu functions. |
| * We only need to adjust the magic here. */ |
| cfg->md.magic = DRBD_MD_MAGIC_08; |
| |
| // The MDF Flags are (nearly) the same in 07 and 08 |
| cfg->md.flags = cfg->md.gc[Flags]; |
| |
| cfg->md.current_uuid = |
| (uint64_t)(cfg->md.gc[HumanCnt] & 0xffff) << 48 | |
| (uint64_t)(cfg->md.gc[TimeoutCnt] & 0xffff) << 32 | |
| (uint64_t)((cfg->md.gc[ConnectedCnt]+cfg->md.gc[ArbitraryCnt]) |
| & 0xffff) << 16 | |
| (uint64_t)0xbabe; |
| cfg->md.peers[0].bitmap_uuid = (uint64_t)0; |
| |
| for (i = cfg->bits_set ? UI_BITMAP : UI_HISTORY_START, j = 1; |
| i <= UI_HISTORY_END ; i++, j++) { |
| if (i == UI_BITMAP) |
| cfg->md.peers[0].bitmap_uuid = cfg->md.current_uuid - j*0x10000; |
| else |
| cfg->md.history_uuids[i - UI_HISTORY_START] = |
| cfg->md.current_uuid - j*0x10000; |
| } |
| |
| /* unconditionally re-initialize offsets, |
| * not necessary if fixed size external, |
| * necessary if flex external or internal */ |
| re_initialize_md_offsets(cfg); |
| |
| if (!is_valid_md(DRBD_V08, &cfg->md, cfg->md_index, cfg->bd_size)) { |
| fprintf(stderr, "Conversion failed.\nThis is a bug :(\n"); |
| exit(111); |
| } |
| } |
| |
| void md_convert_08_to_07(struct format *cfg) |
| { |
| /* |
| * FIXME |
| * what about the UI_BITMAP, and the Activity Log? |
| * how to bring them over for internal meta data? |
| * |
| * maybe just refuse to convert anything that is not |
| * "clean"? how to detect that? |
| * |
| * FIXME: if I am a crashed R_PRIMARY, or D_INCONSISTENT, |
| * or Want-Full-Sync or the like, |
| * refuse, and indicate how to solve this */ |
| |
| printf("Converting meta data...\n"); |
| //if (!cfg->bits_counted) count_bits(cfg); |
| /* FIXME: |
| * if this is "internal" meta data, and I have bits set, |
| * either move the bitmap into the newly expected place, |
| * or refuse, and indicate how to solve this */ |
| |
| /* KB <-> sectors is done in the md disk<->cpu functions. |
| * We only need to adjust the magic here. */ |
| cfg->md.magic = DRBD_MD_MAGIC_07; |
| |
| /* FIXME somehow generate GCs in a sane way */ |
| /* FIXME convert the flags? */ |
| printf("Conversion v08 -> v07 is BROKEN!\n" |
| "Be prepared to manually intervene!\n"); |
| /* FIXME put some more helpful text here, indicating what exactly is to |
| * be done to make this work as expected. */ |
| |
| /* unconditionally re-initialize offsets, |
| * not necessary if fixed size external, |
| * necessary if flex external or internal */ |
| re_initialize_md_offsets(cfg); |
| |
| if (!is_valid_md(DRBD_V07, &cfg->md, cfg->md_index, cfg->bd_size)) { |
| fprintf(stderr, "Conversion failed.\nThis is a bug :(\n"); |
| exit(111); |
| } |
| } |
| |
| void md_convert_08_to_09(struct format *cfg) |
| { |
| int p; |
| |
| for (p = 0; p < DRBD_NODE_ID_MAX; p++) { |
| cfg->md.peers[p].bitmap_uuid = 0; |
| cfg->md.peers[p].flags = 0; |
| cfg->md.peers[p].bitmap_index = -1; |
| } |
| |
| if (cfg->md.flags & MDF_CONNECTED_IND) |
| cfg->md.peers[0].flags |= MDF_PEER_CONNECTED; |
| |
| if (cfg->md.flags & MDF_FULL_SYNC) |
| cfg->md.peers[0].flags |= MDF_PEER_FULL_SYNC; |
| |
| if (cfg->md.flags & MDF_PEER_OUT_DATED) |
| cfg->md.peers[0].flags |= MDF_PEER_OUTDATED; |
| |
| cfg->md.flags &= ~(MDF_CONNECTED_IND | MDF_FULL_SYNC | MDF_PEER_OUT_DATED); |
| |
| cfg->md.node_id = -1; |
| cfg->md.magic = DRBD_MD_MAGIC_09; |
| re_initialize_md_offsets(cfg); |
| |
| if (!is_valid_md(DRBD_V09, &cfg->md, cfg->md_index, cfg->bd_size)) { |
| fprintf(stderr, "Conversion failed.\nThis is a bug :(\n"); |
| exit(111); |
| } |
| } |
| |
| void md_convert_09_to_08(struct format *cfg) |
| { |
| if (cfg->md.peers[0].flags & MDF_PEER_CONNECTED) |
| cfg->md.flags |= MDF_CONNECTED_IND; |
| |
| if (cfg->md.peers[0].flags & MDF_PEER_FULL_SYNC) |
| cfg->md.flags |= MDF_FULL_SYNC; |
| |
| if (cfg->md.peers[0].flags & MDF_PEER_OUTDATED) |
| cfg->md.flags |= MDF_PEER_OUT_DATED; |
| |
| cfg->md.magic = DRBD_MD_MAGIC_08; |
| cfg->md.max_peers = 1; |
| re_initialize_md_offsets(cfg); |
| |
| if (!is_valid_md(DRBD_V08, &cfg->md, cfg->md_index, cfg->bd_size)) { |
| fprintf(stderr, "Conversion failed.\nThis is a bug :(\n"); |
| exit(111); |
| } |
| } |
| |
| void convert_md(struct format *cfg, enum md_format from) |
| { |
| enum md_format to = format_version(cfg); |
| |
| switch(to) { |
| default: |
| case DRBD_UNKNOWN: |
| case DRBD_V06: |
| fprintf(stderr, "BUG in %s() %d.\n", __FUNCTION__, __LINE__); |
| exit(10); |
| case DRBD_V07: |
| switch(from) { |
| case DRBD_V09: |
| md_convert_09_to_08(cfg); |
| case DRBD_V08: |
| md_convert_08_to_07(cfg); |
| case DRBD_V07: |
| break; |
| case DRBD_V06: |
| case DRBD_UNKNOWN: |
| default: |
| fprintf(stderr, "BUG in %s() %d.\n", __FUNCTION__, __LINE__); |
| exit(10); |
| } |
| break; |
| case DRBD_V08: |
| switch(from) { |
| default: |
| case DRBD_UNKNOWN: |
| case DRBD_V06: |
| fprintf(stderr, "BUG in %s() %d.\n", __FUNCTION__, __LINE__); |
| exit(10); |
| case DRBD_V07: |
| md_convert_07_to_08(cfg); |
| case DRBD_V08: |
| break; |
| case DRBD_V09: |
| md_convert_09_to_08(cfg); |
| } |
| break; |
| case DRBD_V09: |
| switch(from) { |
| default: |
| case DRBD_UNKNOWN: |
| case DRBD_V06: |
| fprintf(stderr, "BUG in %s() %d.\n", __FUNCTION__, __LINE__); |
| exit(10); |
| case DRBD_V07: |
| md_convert_07_to_08(cfg); |
| case DRBD_V08: |
| md_convert_08_to_09(cfg); |
| case DRBD_V09: |
| ; |
| } |
| } |
| } |
| |
| /* if on the physical device we find some data we can interpret, |
| * print some informational message about what we found, |
| * and what we think how much room it needs. |
| * |
| * look into /usr/share/misc/magic for inspiration |
| * also consider e.g. xfsprogs/libdisk/fstype.c, |
| * and of course the linux kernel headers... |
| */ |
| struct fstype_s { |
| const char * type; |
| unsigned long long bnum, bsize; |
| }; |
| |
| int may_be_extX(const char *data, struct fstype_s *f) |
| { |
| unsigned int size; |
| if (le16_to_cpu(*(uint16_t*)(data+0x438)) == 0xEF53) { |
| if ( (le32_to_cpu(*(data+0x45c)) & 4) == 4 ) |
| f->type = "ext3 filesystem"; |
| else |
| f->type = "ext2 filesystem"; |
| f->bnum = le32_to_cpu(*(uint32_t*)(data+0x404)); |
| size = le32_to_cpu(*(uint32_t*)(data+0x418)); |
| f->bsize = size == 0 ? 1024 : |
| size == 1 ? 2048 : |
| size == 2 ? 4096 : |
| 4096; /* DEFAULT */ |
| return 1; |
| } |
| return 0; |
| } |
| |
| int may_be_xfs(const char *data, struct fstype_s *f) |
| { |
| if (be32_to_cpu(*(uint32_t*)(data+0)) == 0x58465342) { |
| f->type = "xfs filesystem"; |
| f->bsize = be32_to_cpu(*(uint32_t*)(data+4)); |
| f->bnum = be64_to_cpu(*(uint64_t*)(data+8)); |
| return 1; |
| } |
| return 0; |
| } |
| |
| int may_be_reiserfs(const char *data, struct fstype_s *f) |
| { |
| if (strncmp("ReIsErFs",data+0x10034,8) == 0 || |
| strncmp("ReIsEr2Fs",data+0x10034,9) == 0) { |
| f->type = "reiser filesystem"; |
| f->bnum = le32_to_cpu(*(uint32_t*)(data+0x10000)); |
| f->bsize = le16_to_cpu(*(uint16_t*)(data+0x1002c)); |
| return 1; |
| } |
| return 0; |
| } |
| |
| int may_be_jfs(const char *data, struct fstype_s *f) |
| { |
| if (strncmp("JFS1",data+0x8000,4) == 0) { |
| f->type = "JFS filesystem"; |
| f->bnum = le64_to_cpu(*(uint64_t*)(data+0x8008)); |
| f->bsize = le32_to_cpu(*(uint32_t*)(data+0x8018)); |
| return 1; |
| } |
| return 0; |
| } |
| |
| /* really large block size, |
| * will always refuse */ |
| #define REFUSE_BSIZE 0xFFFFffffFFFF0000LLU |
| #define ERR_BSIZE 0xFFFFffffFFFF0001LLU |
| #define REFUSE_IT() do { f->bnum = 1; f->bsize = REFUSE_BSIZE; } while(0) |
| #define REFUSE_IT_ERR() do { f->bnum = 1; f->bsize = ERR_BSIZE; } while(0) |
| int may_be_swap(const char *data, struct fstype_s *f) |
| { |
| int looks_like_swap = |
| strncmp(data+(1<<12)-10, "SWAP-SPACE", 10) == 0 || |
| strncmp(data+(1<<12)-10, "SWAPSPACE2", 10) == 0 || |
| strncmp(data+(1<<13)-10, "SWAP-SPACE", 10) == 0 || |
| strncmp(data+(1<<13)-10, "SWAPSPACE2", 10) == 0; |
| if (looks_like_swap) { |
| f->type = "swap space signature"; |
| REFUSE_IT(); |
| return 1; |
| } |
| return 0; |
| } |
| |
| #define N_ERR_LINES 4 |
| #define MAX_ERR_LINE_LEN 1024 |
| int guessed_size_from_pvs(struct fstype_s *f, char *dev_name) |
| { |
| char buf_in[200]; |
| char *buf_err[N_ERR_LINES]; |
| size_t c; |
| unsigned long long bnum; |
| int pipes[3][2]; |
| int err_lines = 0; |
| FILE *child_err = NULL; |
| int i; |
| int ret = 0; |
| pid_t pid; |
| |
| buf_err[0] = calloc(N_ERR_LINES, MAX_ERR_LINE_LEN); |
| if (!buf_err[0]) |
| return 0; |
| for (i = 1; i < N_ERR_LINES; i++) |
| buf_err[i] = buf_err[i-1] + MAX_ERR_LINE_LEN; |
| |
| for (i = 0; i < 3; i++) { |
| if (pipe(pipes[i])) |
| goto out; |
| } |
| |
| pid = fork(); |
| if (pid < 0) |
| goto out; |
| |
| setenv("dev_name", dev_name, 1); |
| if (pid == 0) { |
| /* child */ |
| char *argv[] = { |
| "sh", "-vxc", |
| "pvs -vvv --noheadings --nosuffix --units s -o pv_size" |
| " --config \"devices { write_cache_state=0 filter = [ 'a|$dev_name|', 'r|.|' ] }\"", |
| NULL, |
| }; |
| close(pipes[0][1]); /* close unused pipe ends */ |
| close(pipes[1][0]); |
| close(pipes[2][0]); |
| |
| dup2(pipes[0][0],0); /* map to expected stdin/out/err */ |
| dup2(pipes[1][1],1); |
| dup2(pipes[2][1],2); |
| |
| close(0); /* we do not use stdin */ |
| execvp(argv[0], argv); |
| _exit(0); |
| } |
| /* parent */ |
| close(pipes[0][0]); /* close unused pipe ends */ |
| close(pipes[1][1]); |
| close(pipes[2][1]); |
| |
| close(pipes[0][1]); /* we do not use stdin in child */ |
| |
| /* We use blocking IO on pipes. This could deadlock, |
| * If the child process would do something unexpected. |
| * We do know the behaviour of pvs, though, |
| * and expect only a few bytes on stdout, |
| * and quite a few debug messages on stderr. |
| * |
| * First drain stderr, keeping the last N_ERR_LINES, |
| * then read stdout. */ |
| child_err = fdopen(pipes[2][0], "r"); |
| if (child_err) { |
| char *b; |
| do { |
| err_lines = (err_lines + 1) % N_ERR_LINES; |
| b = fgets(buf_err[err_lines], MAX_ERR_LINE_LEN, child_err); |
| } while (b); |
| } |
| |
| c = read(pipes[1][0], buf_in, sizeof(buf_in)-1); |
| if (c > 0) { |
| buf_in[c] = 0; |
| if (1 == sscanf(buf_in, " %llu\n", &bnum)) { |
| f->bnum = bnum; |
| f->bsize = 512; |
| ret = 1; |
| } |
| } |
| if (!ret) { |
| for (i = 0; i < N_ERR_LINES; i++) { |
| char *b = buf_err[(err_lines + i) % N_ERR_LINES]; |
| if (b[0] == 0) |
| continue; |
| fprintf(stderr, "pvs stderr:%s", b); |
| } |
| fprintf(stderr, "\n"); |
| } |
| |
| i = 2; |
| out: |
| for ( ; i >= 0; i--) { |
| close(pipes[i][0]); |
| close(pipes[i][1]); |
| } |
| if (child_err) |
| fclose(child_err); |
| free(buf_err[0]); |
| return ret; |
| } |
| |
| int may_be_LVM(const char *data, struct fstype_s *f, char *dev_name) |
| { |
| if (strncmp("LVM2",data+0x218,4) == 0) { |
| f->type = "LVM2 physical volume signature"; |
| if (!guessed_size_from_pvs(f, dev_name)) |
| REFUSE_IT_ERR(); |
| return 1; |
| } |
| return 0; |
| } |
| |
| /* XXX should all this output go to stderr? */ |
| void check_for_existing_data(struct format *cfg) |
| { |
| struct fstype_s f; |
| size_t i; |
| uint64_t fs_kB; |
| uint64_t max_usable_kB; |
| |
| PREAD(cfg, on_disk_buffer, SO_MUCH, 0); |
| |
| for (i = 0; i < SO_MUCH/sizeof(long); i++) { |
| if (((long*)(on_disk_buffer))[i] != 0LU) break; |
| } |
| /* all zeros? no message */ |
| if (i == SO_MUCH/sizeof(long)) return; |
| |
| f.type = "some data"; |
| f.bnum = 0; |
| f.bsize = 0; |
| |
| /* FIXME add more detection magic. |
| * Or, rather, use some lib. |
| */ |
| |
| (void)( |
| may_be_swap (on_disk_buffer,&f) || |
| may_be_LVM (on_disk_buffer,&f, cfg->md_device_name) || |
| |
| may_be_extX (on_disk_buffer,&f) || |
| may_be_xfs (on_disk_buffer,&f) || |
| may_be_jfs (on_disk_buffer,&f) || |
| may_be_reiserfs (on_disk_buffer,&f) |
| ); |
| |
| /* FIXME |
| * some of the messages below only make sense for internal meta data. |
| * for external meta data, we now only checked the meta-disk. |
| * we should still check the actual lower level storage area for |
| * existing data, too, and give appropriate warnings when it would |
| * appear to be truncated by too small external meta data */ |
| |
| printf("md_offset %llu\n", (long long unsigned)cfg->md_offset); |
| printf("al_offset %llu\n", (long long unsigned)cfg->al_offset); |
| printf("bm_offset %llu\n", (long long unsigned)cfg->bm_offset); |
| |
| printf("\nFound %s\n", f.type); |
| |
| /* FIXME overflow check missing! |
| * relevant for ln2(bsize) + ln2(bnum) >= 64, thus only for |
| * device sizes of more than several exa byte. |
| * seems irrelevant to me for now. |
| */ |
| fs_kB = ((f.bsize * f.bnum) + (1<<10)-1) >> 10; |
| max_usable_kB = max_usable_sectors(cfg) >> 1; |
| |
| if (f.bnum) { |
| if (cfg->md_index >= 0 || |
| cfg->md_index == DRBD_MD_INDEX_FLEX_EXT) { |
| printf("\nThis would corrupt existing data.\n"); |
| if (ignore_sanity_checks) { |
| printf("\nIgnoring sanity check on user request.\n\n"); |
| return; |
| } |
| printf( |
| "If you want me to do this, you need to zero out the first part\n" |
| "of the device (destroy the content).\n" |
| "You should be very sure that you mean it.\n" |
| "Operation refused.\n\n"); |
| exit(40); /* FIXME sane exit code! */ |
| } |
| |
| if (f.bsize < REFUSE_BSIZE) |
| printf("%12llu kB data area apparently used\n", (unsigned long long)fs_kB); |
| printf("%12llu kB left usable by current configuration\n", (unsigned long long)max_usable_kB); |
| |
| if (f.bsize == ERR_BSIZE) |
| printf( |
| "Could not determine the size of the actually used data area.\n\n"); |
| if (f.bsize >= REFUSE_BSIZE) { |
| printf( |
| "Device size would be truncated, which\n" |
| "would corrupt data and result in\n" |
| "'access beyond end of device' errors.\n"); |
| if (ignore_sanity_checks) { |
| printf("\nIgnoring sanity check on user request.\n\n"); |
| return; |
| } |
| printf( |
| "If you want me to do this, you need to zero out the first part\n" |
| "of the device (destroy the content).\n" |
| "You should be very sure that you mean it.\n" |
| "Operation refused.\n\n"); |
| exit(40); /* FIXME sane exit code! */ |
| } |
| |
| /* looks like file system data */ |
| if (fs_kB > max_usable_kB) { |
| printf( |
| "\nDevice size would be truncated, which\n" |
| "would corrupt data and result in\n" |
| "'access beyond end of device' errors.\n" |
| "You need to either\n" |
| " * use external meta data (recommended)\n" |
| " * shrink that filesystem first\n" |
| " * zero out the device (destroy the filesystem)\n" |
| "Operation refused.\n\n"); |
| exit(40); /* FIXME sane exit code! */ |
| } else { |
| printf( |
| "\nEven though it looks like this would place the new meta data into\n" |
| "unused space, you still need to confirm, as this is only a guess.\n"); |
| } |
| } else |
| printf("\n ==> This might destroy existing data! <==\n"); |
| |
| if (!confirmed("Do you want to proceed?")) { |
| printf("Operation canceled.\n"); |
| exit(1); // 1 to avoid online resource counting |
| } |
| } |
| |
| /* tries to guess what is in the on_disk_buffer */ |
| enum md_format detect_md(struct md_cpu *md, const uint64_t ll_size, int index_format) |
| { |
| struct md_cpu md_test; |
| enum md_format have = DRBD_UNKNOWN; |
| |
| md_disk_07_to_cpu(&md_test, (struct md_on_disk_07*)on_disk_buffer); |
| if (is_valid_md(DRBD_V07, &md_test, index_format, ll_size)) { |
| have = DRBD_V07; |
| *md = md_test; |
| } |
| |
| md_disk_08_to_cpu(&md_test, (struct md_on_disk_08*)on_disk_buffer); |
| if (is_valid_md(DRBD_V08, &md_test, index_format, ll_size)) { |
| have = DRBD_V08; |
| *md = md_test; |
| } |
| |
| md_disk_09_to_cpu(&md_test, (struct meta_data_on_disk_9*)on_disk_buffer); |
| if (is_valid_md(DRBD_V09, &md_test, index_format, ll_size)) { |
| have = DRBD_V09; |
| *md = md_test; |
| } |
| |
| return have; |
| } |
| |
| void check_internal_md_flavours(struct format * cfg) { |
| struct md_cpu md_now; |
| off_t fixed_offset, flex_offset; |
| enum md_format have = DRBD_UNKNOWN; |
| int fixed = 0; /* as opposed to flex */ |
| |
| ASSERT( cfg->md_index == DRBD_MD_INDEX_INTERNAL || |
| cfg->md_index == DRBD_MD_INDEX_FLEX_INT ); |
| |
| fixed_offset = v07_style_md_get_byte_offset( |
| DRBD_MD_INDEX_INTERNAL, cfg->bd_size); |
| flex_offset = v07_style_md_get_byte_offset( |
| DRBD_MD_INDEX_FLEX_INT, cfg->bd_size); |
| |
| /* printf("%lld\n%lld\n%lld\n", (long long unsigned)cfg->bd_size, |
| (long long unsigned)fixed_offset, (long long unsigned)flex_offset); */ |
| if (0 <= fixed_offset && fixed_offset < (off_t)cfg->bd_size - 4096) { |
| struct md_cpu md_test; |
| /* ... v07 fixed-size internal meta data? */ |
| PREAD(cfg, on_disk_buffer, 4096, fixed_offset); |
| |
| md_disk_07_to_cpu(&md_test, |
| (struct md_on_disk_07*)on_disk_buffer); |
| if (is_valid_md(DRBD_V07, &md_test, DRBD_MD_INDEX_INTERNAL, cfg->bd_size)) { |
| have = DRBD_V07; |
| fixed = 1; |
| md_now = md_test; |
| } |
| } |
| |
| if (have == DRBD_UNKNOWN) { |
| PREAD(cfg, on_disk_buffer, 4096, flex_offset); |
| have = detect_md(&md_now, cfg->bd_size, DRBD_MD_INDEX_FLEX_INT); |
| } |
| |
| if (have == DRBD_UNKNOWN) |
| return; |
| |
| fprintf(stderr, "You want me to create a %s%s style %s internal meta data block.\n", |
| cfg->ops->name, |
| (is_v07(cfg) && cfg->md_index == DRBD_MD_INDEX_FLEX_INT) ? "(plus)" : "", |
| cfg->md_index == DRBD_MD_INDEX_FLEX_INT ? "flexible-size" : "fixed-size"); |
| |
| |
| fprintf(stderr, "There appears to be a %s %s internal meta data block\n" |
| "already in place on %s at byte offset %llu\n", |
| f_ops[have].name, fixed ? "fixed-size" : "flexible-size", |
| cfg->md_device_name, |
| fixed ? (long long unsigned)fixed_offset : (long long unsigned)flex_offset); |
| |
| if (format_version(cfg) == have) { |
| if (have != DRBD_V07 |
| && (cfg->md.al_stripes != option_al_stripes |
| || cfg->md.al_stripe_size_4k != option_al_stripe_size_4k)) { |
| if (confirmed("Do you want to change the activity log stripe settings *only*?")) { |
| fprintf(stderr, |
| "Sorry, not yet fully implemented\n" |
| "Try dump-md > dump.txt; restore-md -s x -z y dump.txt\n"); |
| exit(30); |
| /* |
| * ??? |
| * cfg->md.al_stripes = option_al_stripes; |
| * cfg->md.al_stripe_size_4k = option_al_stripe_size_4k; |
| * re_initialize_md_offsets(cfg); |
| * return; |
| * ??? |
| */ |
| } |
| } |
| if (!confirmed("Do you really want to overwrite the existing meta-data?")) { |
| printf("Operation cancelled.\n"); |
| exit(1); // 1 to avoid online resource counting |
| } |
| cfg->md.magic = 0; |
| } else { |
| char msg[160]; |
| |
| snprintf(msg, 160, "Valid %s meta-data found, convert to %s?", |
| f_ops[have].name, cfg->ops->name); |
| if (confirmed(msg)) { |
| cfg->md = md_now; |
| convert_md(cfg, have); |
| } else { |
| snprintf(msg, 160, "So you want me to replace the %s meta-data\n" |
| "with newly initialized %s meta-data?", |
| f_ops[have].name, cfg->ops->name); |
| if (!confirmed(msg)) { |
| printf("Operation cancelled.\n"); |
| exit(1); // 1 to avoid online resource counting |
| } |
| cfg->md.magic = 0; |
| } |
| } |
| |
| /* we have two "internal" layouts: |
| * v07 "fixed" internal: |
| * | data .... |MD super block |AL | bitmap | |
| * v07 "plus", v08, v09 "flexible" internal: |
| * | data .... | bitmap |AL |MD super block | |
| * If we change from one layout to the other, |
| * we want to wipe the former MD super block |
| * after successful conversion. |
| */ |
| /* we convert from v07 "fixed" to flexible internal, we wipe the "fixed" offset */ |
| if (have == DRBD_V07 && fixed && cfg->md_index == DRBD_MD_INDEX_FLEX_INT) |
| cfg->wipe_fixed = fixed_offset; |
| /* we convert from "flexible" to v07 fixed, we wipe the "flexible" offset */ |
| else if ((have != DRBD_V07 || fixed == 0) && (is_v07(cfg) && cfg->md_index == DRBD_MD_INDEX_INTERNAL)) |
| cfg->wipe_flex = flex_offset; |
| } |
| |
| void wipe_after_convert(struct format *cfg) |
| { |
| memset(on_disk_buffer, 0x00, 4096); |
| if (cfg->wipe_fixed) |
| pwrite_or_die(cfg, on_disk_buffer, 4096, cfg->wipe_fixed, |
| "wipe fixed-size v07 internal md"); |
| if (cfg->wipe_flex) |
| pwrite_or_die(cfg, on_disk_buffer, 4096, cfg->wipe_flex, |
| "wipe flexible-size internal md"); |
| } |
| |
| void check_external_md_flavours(struct format * cfg) { |
| struct md_cpu md_now; |
| enum md_format have = DRBD_UNKNOWN; |
| char msg[160]; |
| |
| ASSERT( cfg->md_index >= 0 || |
| cfg->md_index == DRBD_MD_INDEX_FLEX_EXT ); |
| |
| if (cfg->md.magic) { |
| if (!confirmed("Valid meta data seems to be in place.\n" |
| "Do you really want to overwrite?")) { |
| printf("Operation cancelled.\n"); |
| exit(1); |
| } |
| cfg->md.magic = 0; |
| return; |
| } |
| |
| PREAD(cfg, on_disk_buffer, 4096, cfg->md_offset); |
| have = detect_md(&md_now, cfg->bd_size, DRBD_MD_INDEX_FLEX_EXT); |
| |
| if (have == DRBD_UNKNOWN) |
| return; |
| |
| snprintf(msg, 160, "Valid %s meta-data found, convert to %s?", |
| f_ops[have].name, cfg->ops->name); |
| if (confirmed(msg)) { |
| cfg->md = md_now; |
| convert_md(cfg, have); |
| } else { |
| snprintf(msg, 160, "So you want me to replace the %s meta-data\n" |
| "with newly initialized %s meta-data?", |
| f_ops[have].name, cfg->ops->name); |
| if (confirmed(msg)) { |
| cfg->md.magic = 0; |
| return; |
| } |
| |
| printf("Operation cancelled.\n"); |
| exit(1); |
| } |
| } |
| |
| /* ok, so there is no valid meta data at the end of the device, |
| * but there is valid internal meta data at the "last known" |
| * position. Move the stuff. |
| * Areas may overlap: |
| * |--...~//~[BITMAP][AL][SB]| <<- last known |
| * |--.......~//~[BITMAP][AL][SB]| <<- what it should look like now |
| * So we move it in chunks. |
| */ |
| int v08_move_internal_md_after_resize(struct format *cfg) |
| { |
| struct md_cpu md_old; |
| off_t old_offset; |
| off_t old_bm_offset; |
| off_t cur_offset; |
| off_t last_chunk_size; |
| int err; |
| |
| ASSERT(format_version(cfg) >= DRBD_V08); |
| ASSERT(cfg->md_index == DRBD_MD_INDEX_FLEX_INT); |
| ASSERT(cfg->lk_bd.bd_size <= cfg->bd_size); |
| |
| /* we just read it in v08_check_for_resize(). |
| * no need to do it again, but ASSERT this. */ |
| md_old = cfg->md; |
| ASSERT(is_valid_md(format_version(cfg), &md_old, DRBD_MD_INDEX_FLEX_INT, cfg->lk_bd.bd_size)); |
| old_offset = v07_style_md_get_byte_offset(DRBD_MD_INDEX_FLEX_INT, cfg->lk_bd.bd_size); |
| |
| /* fix AL and bitmap offsets, populate byte offsets for the new location */ |
| re_initialize_md_offsets(cfg); |
| |
| fprintf(stderr, "Moving the internal meta data to its proper location\n"); |
| |
| if (verbose >= 2) { |
| fprintf(stderr,"old md_offset: "U64"\n", old_offset); |
| fprintf(stderr,"old al_offset: %llu (%d)\n", old_offset + md_old.al_offset * 512LL, md_old.al_offset); |
| fprintf(stderr,"old bm_offset: %llu (%d)\n", old_offset + md_old.bm_offset * 512LL, md_old.bm_offset); |
| |
| fprintf(stderr,"new md_offset: "U64"\n", cfg->md_offset); |
| fprintf(stderr,"new al_offset: "U64" (%d)\n", cfg->al_offset, cfg->md.al_offset); |
| fprintf(stderr,"new bm_offset: "U64" (%d)\n", cfg->bm_offset, cfg->md.bm_offset); |
| |
| fprintf(stderr,"md_size_sect: "U32"\n", cfg->md.md_size_sect); |
| fprintf(stderr,"max_usable_sect: "U64"\n", cfg->max_usable_sect); |
| } |
| |
| /* FIXME |
| * If the new meta data area overlaps the old "super block", |
| * and we crash before we successfully wrote the new super block, |
| * but after we overwrote the old, we are out of luck! |
| * But I don't want to write the new superblock early, either. |
| */ |
| |
| /* move activity log, fixed size immediately preceeding the "super block". */ |
| cur_offset = old_offset + md_old.al_offset * 512LL; |
| PREAD(cfg, on_disk_buffer, old_offset - cur_offset, cur_offset); |
| PWRITE(cfg, on_disk_buffer, old_offset - cur_offset, cfg->al_offset); |
| |
| /* The AL was of fixed size. |
| * Bitmap is of flexible size, new bitmap is likely larger. |
| * We do not initialize that part, we just leave "garbage" in there. |
| * Once DRBD "agrees" on the new lower level device size, that part of |
| * the bitmap will be handled by the module, anyways. */ |
| old_bm_offset = old_offset + cfg->md.bm_offset * 512LL; |
| |
| /* move bitmap, in chunks, peel off from the end. */ |
| cur_offset = old_offset + cfg->md.al_offset * 512LL - buffer_size; |
| while (cur_offset > old_bm_offset) { |
| PREAD(cfg, on_disk_buffer, buffer_size, cur_offset); |
| PWRITE(cfg, on_disk_buffer, buffer_size, |
| cfg->bm_offset + (cur_offset - old_bm_offset)); |
| cur_offset -= buffer_size; |
| } |
| |
| /* Adjust for last, possibly partial buffer. */ |
| last_chunk_size = buffer_size - (old_bm_offset - cur_offset); |
| PREAD(cfg, on_disk_buffer, last_chunk_size, old_bm_offset); |
| PWRITE(cfg, on_disk_buffer, last_chunk_size, cfg->bm_offset); |
| |
| /* fix bitmap offset in meta data, |
| * and rewrite the "super block" */ |
| re_initialize_md_offsets(cfg); |
| |
| err = cfg->ops->md_cpu_to_disk(cfg); |
| |
| if (!err) |
| printf("Internal drbd meta data successfully moved.\n"); |
| |
| if (!err && old_offset < cfg->bm_offset) { |
| /* wipe out previous meta data block, it has been superseded. */ |
| cfg->wipe_resize = old_offset; |
| memset(on_disk_buffer, 0, 4096); |
| PWRITE(cfg, on_disk_buffer, 4096, old_offset); |
| } |
| |
| err = cfg->ops->close(cfg) || err; |
| if (err) |
| fprintf(stderr, "operation failed\n"); |
| |
| return err; |
| } |
| |
| int meta_create_md(struct format *cfg, char **argv __attribute((unused)), int argc) |
| { |
| int err = 0; |
| int max_peers = 1; |
| |
| if (is_v09(cfg)) { |
| if (argc < 1) { |
| fprintf(stderr, "USAGE: %s MINOR v09 ... create-md MAX_PEERS\n" |
| "\n" |
| " MAX_PEERS argument missing\n", progname); |
| exit(20); |
| } else if (argc > 1) |
| fprintf(stderr, "Ignoring additional arguments\n"); |
| |
| max_peers = m_strtoll(argv[0], 1); |
| } else if (argc > 0) |
| fprintf(stderr, "Ignoring additional arguments\n"); |
| |
| if (max_peers < 1 || max_peers > DRBD_PEERS_MAX) { |
| fprintf(stderr, "MAX_PEERS argument not in allowed range 1 .. %d.\n", DRBD_PEERS_MAX); |
| exit(20); |
| } |
| |
| err = cfg->ops->open(cfg); |
| |
| /* Suggest to move existing meta data after offline resize. Though, if |
| * you --force create-md, you probably mean it, so we don't even ask. |
| * If you want to automatically move it, use check-resize. |
| */ |
| if (err == VALID_MD_FOUND_AT_LAST_KNOWN_LOCATION) { |
| if (option_al_stripes_used) { |
| if (option_al_stripes != cfg->md.al_stripes |
| || option_al_stripe_size_4k != cfg->md.al_stripe_size_4k) { |
| fprintf(stderr, "Cannot move after offline resize and change AL-striping at the same time, yet.\n"); |
| exit(20); |
| } |
| } |
| if (!force && |
| confirmed("Move internal meta data from last-known position?\n")) { |
| /* Maybe we want to use some library that provides detection of |
| * fs/partition/usage types? */ |
| check_for_existing_data(cfg); |
| return v08_move_internal_md_after_resize(cfg); |
| } |
| /* else: reset cfg->md, it needs to be re-initialized below */ |
| memset(&cfg->md, 0, sizeof(cfg->md)); |
| } |
| |
| /* the offset of v07 fixed-size internal meta data is different from |
| * the offset of the flexible-size v07 ("plus") and v08 (default) |
| * internal meta data. |
| * to avoid the situation where we would have "valid" meta data blocks |
| * of different versions at different offsets, we also need to check |
| * the other format, and the other offset. |
| * |
| * on a request to create v07 fixed-size internal meta data, we also |
| * check flex-internal v08 [and v07 (plus)] at the other offset. |
| * |
| * on a request to create v08 flex-internal meta data (or v07 plus, for |
| * that matter), we also check the same offset for the respective other |
| * flex-internal format version, as well as the v07 fixed-size internal |
| * meta data offset for its flavor of meta data. |
| */ |
| if (cfg->md_index == DRBD_MD_INDEX_INTERNAL || |
| cfg->md_index == DRBD_MD_INDEX_FLEX_INT) |
| check_internal_md_flavours(cfg); |
| else |
| check_external_md_flavours(cfg); |
| |
| if (!cfg->md.magic) /* not converted: initialize */ |
| /* calls check_for_existing_data() internally */ |
| err = cfg->ops->md_initialize(cfg, 1, max_peers); /* Clears on disk AL implicitly */ |
| else { |
| if (format_version(cfg) >= DRBD_V09 && max_peers != 1) |
| printf("Warning: setting max_peers to 1 instead of %d\n\n", |
| max_peers); |
| err = 0; /* we have sucessfully converted somthing */ |
| |
| check_for_existing_data(cfg); |
| } |
| |
| cfg->md.la_peer_max_bio_size = option_peer_max_bio_size; |
| |
| /* FIXME |
| * if this converted fixed-size 128MB internal meta data |
| * to flexible size, we'd need to move the AL and bitmap |
| * over to the new location! |
| * But the upgrade procedure in such case is documented to first get |
| * the previous DRBD into "clean" L_ESTABLISHED R_SECONDARY/R_SECONDARY, so AL |
| * and bitmap should be empty anyways. |
| */ |
| printf("Writing meta data...\n"); |
| err = err || cfg->ops->md_cpu_to_disk(cfg); // <- short circuit |
| if (!err) |
| wipe_after_convert(cfg); |
| err = cfg->ops->close(cfg) || err; // <- close always |
| if (err) |
| fprintf(stderr, "operation failed\n"); |
| else |
| printf("New drbd meta data block successfully created.\n"); |
| |
| return err; |
| } |
| |
| int meta_wipe_md(struct format *cfg, char **argv __attribute((unused)), int argc) |
| { |
| int virgin, err; |
| if (argc > 0) { |
| fprintf(stderr, "Ignoring additional arguments\n"); |
| } |
| |
| virgin = cfg->ops->open(cfg); |
| if (virgin) { |
| fprintf(stderr,"There appears to be no drbd meta data to wipe out?\n"); |
| return 0; |
| } |
| |
| if (!confirmed("Do you really want to wipe out the DRBD meta data?")) { |
| printf("Operation cancelled.\n"); |
| exit(1); |
| } |
| |
| printf("Wiping meta data...\n"); |
| memset(on_disk_buffer, 0, 4096); |
| PWRITE(cfg, on_disk_buffer, 4096, cfg->md_offset); |
| |
| err = cfg->ops->close(cfg); |
| if (err) |
| fprintf(stderr, "operation failed\n"); |
| else |
| printf("DRBD meta data block successfully wiped out.\n"); |
| |
| /* delete last-known bdev info, it is of no use now. */ |
| lk_bdev_delete(cfg->minor); |
| |
| return err; |
| } |
| |
| int meta_outdate(struct format *cfg, char **argv __attribute((unused)), int argc) |
| { |
| int err; |
| |
| if (argc > 0) { |
| fprintf(stderr, "Ignoring additional arguments\n"); |
| } |
| |
| if (cfg->ops->open(cfg)) |
| return -1; |
| |
| if (cfg->ops->outdate_gi(&cfg->md)) { |
| fprintf(stderr, "Device is inconsistent.\n"); |
| exit(5); |
| } |
| |
| err = cfg->ops->md_cpu_to_disk(cfg); |
| err = cfg->ops->close(cfg) || err; // <- close always |
| if (err) |
| fprintf(stderr, "update failed\n"); |
| |
| return err; |
| } |
| |
| int meta_invalidate(struct format *cfg, char **argv __attribute((unused)), int argc) |
| { |
| int err; |
| |
| if (argc > 0) { |
| fprintf(stderr, "Ignoring additional arguments\n"); |
| } |
| |
| if (cfg->ops->open(cfg)) |
| return -1; |
| |
| cfg->ops->invalidate_gi(&cfg->md); |
| err = cfg->ops->md_cpu_to_disk(cfg); |
| err = cfg->ops->close(cfg) || err; // <- close always |
| if (err) |
| fprintf(stderr, "update failed\n"); |
| |
| return err; |
| } |
| |
| int meta_read_dev_uuid(struct format *cfg, char **argv __attribute((unused)), int argc) |
| { |
| if (argc > 0) { |
| fprintf(stderr, "Ignoring additional arguments\n"); |
| } |
| |
| if (cfg->ops->open(cfg)) |
| return -1; |
| |
| printf(X64(016)"\n",cfg->md.device_uuid); |
| |
| return cfg->ops->close(cfg); |
| } |
| |
| int meta_write_dev_uuid(struct format *cfg, char **argv, int argc) |
| { |
| int err; |
| |
| if (argc > 1) { |
| fprintf(stderr, "Ignoring additional arguments\n"); |
| } |
| if (argc < 1) { |
| fprintf(stderr, "Required Argument missing\n"); |
| exit(10); |
| } |
| |
| if (cfg->ops->open(cfg)) |
| return -1; |
| |
| cfg->md.device_uuid = strto_u64(argv[0],NULL,16); |
| |
| err = cfg->ops->md_cpu_to_disk(cfg); |
| err = cfg->ops->close(cfg) || err; |
| if (err) |
| fprintf(stderr, "update failed\n"); |
| |
| return err; |
| } |
| |
| void print_usage_and_exit() |
| { |
| char **args; |
| size_t i; |
| |
| printf |
| ("\nUSAGE: %s [--force] DEVICE FORMAT [FORMAT ARGS...] COMMAND [CMD ARGS...]\n", |
| progname); |
| |
| printf("\nFORMATS:\n"); |
| for (i = DRBD_V06; i < DRBD_UNKNOWN; i++) { |
| printf(" %s", f_ops[i].name); |
| if ((args = f_ops[i].args)) { |
| while (*args) { |
| printf(" %s", *args++); |
| } |
| } |
| printf("\n"); |
| } |
| |
| printf("\nCOMMANDS:\n"); |
| for (i = 0; i < ARRAY_SIZE(cmds); i++) { |
| if (!cmds[i].show_in_usage) |
| continue; |
| printf(" %s%s %s\n", cmds[i].name, |
| cmds[i].node_id_required ? " --node-id {val}" : "", |
| cmds[i].args ? cmds[i].args : ""); |
| } |
| |
| exit(20); |
| } |
| |
| int parse_format(struct format *cfg, char **argv, int argc, int *ai) |
| { |
| enum md_format f; |
| |
| if (argc < 1) { |
| fprintf(stderr, "Format identifier missing\n"); |
| return -1; |
| } |
| |
| for (f = DRBD_V06; f < DRBD_UNKNOWN; f++) { |
| if (!strcmp(f_ops[f].name, argv[0])) |
| break; |
| } |
| if (f == DRBD_UNKNOWN) { |
| fprintf(stderr, "Unknown format '%s'.\n", argv[0]); |
| return -1; |
| } |
| |
| (*ai)++; |
| |
| cfg->ops = f_ops + f; |
| return cfg->ops->parse(cfg, argv + 1, argc - 1, ai); |
| } |
| |
| |
| static enum drbd_disk_state drbd_str_disk(const char *str) |
| { |
| /* drbd 8.4 and earlier provide "Local/Remote" |
| * drbd 9. only "Local". */ |
| const char *slash = strchr(str, '/'); |
| size_t len; |
| int n; |
| |
| if (slash) |
| len = slash - str; |
| else |
| len = strlen(str); |
| |
| for (n = 0; n < drbd_disk_state_names.size; n++) { |
| if (drbd_disk_state_names.names[n] && |
| !strncmp(str, drbd_disk_state_names.names[n], len)) |
| return (enum drbd_disk_state)n; |
| } |
| if (!strcmp(str, "Unconfigured")) |
| return D_DISKLESS; |
| |
| fprintf(stderr, "Unexpected output from drbdsetup >%s<\n", str); |
| exit(20); |
| } |
| |
| |
| int is_attached(int minor) |
| { |
| char minor_string[7], result[40]; |
| char *argv[] = { "drbdsetup", minor_string, "dstate", NULL }; |
| int pipes[2]; |
| pid_t pid; |
| int rr, exitcode; |
| |
| if (pipe(pipes)) { |
| perror("drbdsetup pipe"); |
| exit(20); |
| } |
| |
| snprintf(minor_string, ARRAY_SIZE(minor_string), "%d", minor); |
| |
| pid = fork(); |
| if (pid == -1) { |
| perror("fork for drbdsetup"); |
| exit(20); |
| } |
| if (pid == 0) { |
| FILE *f = freopen("/dev/null", "w", stderr); |
| if (!f) |
| fprintf(stderr, "freopen(/dev/null) failed\n"); |
| |
| close(pipes[0]); |
| dup2(pipes[1], 1); |
| |
| execvp(argv[0], argv); |
| fprintf(stderr, "Can not exec drbdsetup\n"); |
| exit(20); |
| } |
| close(pipes[1]); |
| |
| rr = read(pipes[0], result, ARRAY_SIZE(result)); |
| close(pipes[0]); |
| waitpid(pid, &exitcode, 0); |
| if (WEXITSTATUS(exitcode) == 20 || WEXITSTATUS(exitcode) == 10) |
| return 0; /* 20 == no module; 10 == no minor */ |
| |
| if (rr < 1) { |
| perror("read from drbdsetup\n"); |
| exit(20); |
| } |
| result[rr-1] = 0; |
| |
| return drbd_str_disk(result) > D_DISKLESS ? 1 : 0; |
| } |
| |
| int meta_chk_offline_resize(struct format *cfg, char **argv, int argc) |
| { |
| int err; |
| |
| err = cfg->ops->open(cfg); |
| |
| /* this is first, so that lk-bdev-info files are removed/updated |
| * if we find valid meta data in the expected place. */ |
| if (err == VALID_MD_FOUND) { |
| /* Do not clutter the output of the init script |
| printf("Found valid meta data in the expected location, %llu bytes into %s.\n", |
| (unsigned long long)cfg->md_offset, cfg->md_device_name); |
| */ |
| /* create, delete or update the last known info */ |
| if (lk_bdev_load(cfg->minor, &cfg->lk_bd) < 0) |
| return -1; |
| if (cfg->md_index != DRBD_MD_INDEX_FLEX_INT) |
| lk_bdev_delete(cfg->minor); |
| else if (cfg->lk_bd.bd_size != cfg->bd_size || |
| cfg->lk_bd.bd_uuid != cfg->md.device_uuid) |
| cfg->update_lk_bdev = 1; |
| return cfg->ops->close(cfg); |
| } else if (err == NO_VALID_MD_FOUND) { |
| if (format_version(cfg) < DRBD_V08 || cfg->md_index != DRBD_MD_INDEX_FLEX_INT) { |
| fprintf(stderr, "Operation only supported for >= v8 internal meta data\n"); |
| return -1; |
| } |
| fprintf(stderr, "no suitable meta data found :(\n"); |
| return -1; /* sorry :( */ |
| } |
| /* VALID_MD_FOUND_AT_LAST_KNOWN_LOCATION */ |
| |
| ASSERT(format_version(cfg) >= DRBD_V08); |
| ASSERT(cfg->md_index == DRBD_MD_INDEX_FLEX_INT); |
| ASSERT(cfg->lk_bd.bd_size); |
| ASSERT(cfg->md.magic); |
| |
| return v08_move_internal_md_after_resize(cfg); |
| } |
| |
| int meta_forget_peer(struct format *cfg, char **argv, int argc) |
| { |
| int err; |
| |
| err = cfg->ops->open(cfg); |
| if (err) |
| return -1; |
| |
| cfg->md.peers[option_node_id].bitmap_index = -1; |
| cfg->md.peers[option_node_id].bitmap_uuid = 0; |
| cfg->md.peers[option_node_id].flags = 0; |
| |
| cfg->ops->md_cpu_to_disk(cfg); |
| err = cfg->ops->close(cfg) || err; |
| if (err) |
| fprintf(stderr, "update failed\n"); |
| |
| return err; |
| } |
| |
| /* CALL ONLY ONCE as long as on_disk_buffer is global! */ |
| struct format *new_cfg() |
| { |
| int err; |
| struct format *cfg; |
| |
| errno = 0; |
| pagesize = sysconf(_SC_PAGESIZE); |
| if (errno) { |
| perror("could not determine pagesize"); |
| exit(20); |
| } |
| cfg = calloc(1, sizeof(struct format)); |
| if (!cfg) { |
| fprintf(stderr, "could not calloc() cfg\n"); |
| exit(20); |
| } |
| err = posix_memalign(&on_disk_buffer, pagesize, ALIGN(buffer_size, pagesize)); |
| if (err) { |
| fprintf(stderr, "could not posix_memalign() on_disk_buffer\n"); |
| exit(20); |
| } |
| return cfg; |
| } |
| |
| int main(int argc, char **argv) |
| { |
| struct format *cfg; |
| size_t i; |
| int ai, rv; |
| bool minor_attached = false; |
| |
| #if 1 |
| if (sizeof(struct md_on_disk_07) != 4096) { |
| fprintf(stderr, "Where did you get this broken build!?\n" |
| "sizeof(md_on_disk_07) == %lu, should be 4096\n", |
| (unsigned long)sizeof(struct md_on_disk_07)); |
| exit(111); |
| } |
| if (sizeof(struct md_on_disk_08) != 4096) { |
| fprintf(stderr, "Where did you get this broken build!?\n" |
| "sizeof(md_on_disk_08) == %lu, should be 4096\n", |
| (unsigned long)sizeof(struct md_on_disk_08)); |
| exit(111); |
| } |
| if (sizeof(struct meta_data_on_disk_9) != 4096) { |
| fprintf(stderr, "Where did you get this broken build!?\n" |
| "sizeof(meta_data_on_disk_9) == %lu, should be 4096\n", |
| (unsigned long)sizeof(struct meta_data_on_disk_9)); |
| exit(111); |
| } |
| #if 0 |
| printf("v07: al_offset: %u\n", (int)&(((struct md_on_disk_07*)0)->al_offset)); |
| printf("v07: bm_offset: %u\n", (int)&(((struct md_on_disk_07*)0)->bm_offset)); |
| printf("v08: al_offset: %u\n", (int)&(((struct md_on_disk_08*)0)->al_offset)); |
| printf("v08: bm_offset: %u\n", (int)&(((struct md_on_disk_08*)0)->bm_offset)); |
| exit(0); |
| #endif |
| #endif |
| |
| if ((progname = strrchr(argv[0], '/'))) { |
| argv[0] = ++progname; |
| } else { |
| progname = argv[0]; |
| } |
| |
| if (argc < 4) |
| print_usage_and_exit(); |
| |
| /* so dump_md can write a nice header */ |
| global_argc = argc; |
| global_argv = argv; |
| |
| /* Check for options (e.g. --force) */ |
| while (1) { |
| int c = getopt_long(argc, argv, make_optstring(metaopt), metaopt, 0); |
| |
| if (c == -1) |
| break; |
| |
| switch (c) { |
| case 0: |
| break; |
| case 'f': |
| force = 1; |
| break; |
| case 'v': |
| verbose++; |
| break; |
| case 'p': |
| option_peer_max_bio_size = m_strtoll(optarg, 1); |
| if (option_peer_max_bio_size < 0 || |
| option_peer_max_bio_size > 1024 * 1024) { |
| fprintf(stderr, "peer-max-bio-size out of range (0...1M)\n"); |
| exit(10); |
| } |
| break; |
| case 'i': |
| option_node_id = m_strtoll(optarg, 1); |
| if (option_node_id < 0 || option_node_id > (DRBD_PEERS_MAX - 1)) { |
| fprintf(stderr, "node-id out of range (0...%d)\n", DRBD_PEERS_MAX - 1); |
| exit(10); |
| } |
| break; |
| case 's': |
| option_al_stripes = m_strtoll(optarg, 1); |
| option_al_stripes_used = 1; |
| break; |
| case 'z': |
| option_al_stripe_size_4k = m_strtoll(optarg, 'k')/4; |
| option_al_stripes_used = 1; |
| break; |
| default: |
| print_usage_and_exit(); |
| break; |
| } |
| } |
| |
| // Next argument to process is specified by optind... |
| ai = optind; |
| |
| cfg = new_cfg(); |
| cfg->drbd_dev_name = argv[ai++]; |
| |
| if (parse_format(cfg, argv + ai, argc - ai, &ai)) { |
| /* parse has already printed some error message */ |
| exit(20); |
| } |
| |
| if (ai >= argc) { |
| fprintf(stderr, "command missing\n"); |
| exit(20); |
| } |
| |
| for (i = 0; i < ARRAY_SIZE(cmds); i++) { |
| if (!strcmp(cmds[i].name, argv[ai])) { |
| command = cmds + i; |
| break; |
| } |
| } |
| if (command == NULL) { |
| fprintf(stderr, "Unknown command '%s'.\n", argv[ai]); |
| exit(20); |
| } |
| ai++; |
| |
| /* does exit() unless we acquired the lock. |
| * unlock happens implicitly when the process dies, |
| * but may be requested implicitly |
| */ |
| if (strcmp(cfg->drbd_dev_name, "-")) { |
| cfg->minor = dt_minor_of_dev(cfg->drbd_dev_name); |
| if (cfg->minor < 0) { |
| fprintf(stderr, "Cannot determine minor device number of " |
| "drbd device '%s'", |
| cfg->drbd_dev_name); |
| exit(20); |
| } |
| cfg->lock_fd = dt_lock_drbd(cfg->minor); |
| |
| /* check whether this is in use */ |
| minor_attached = is_attached(cfg->minor); |
| if (minor_attached && command->modifies_md) { |
| fprintf(stderr, "Device '%s' is configured!\n", |
| cfg->drbd_dev_name); |
| exit(20); |
| } |
| } else { |
| cfg->minor = -1; |
| cfg->lock_fd = -1; |
| } |
| |
| if (option_peer_max_bio_size && |
| command->function != &meta_create_md) { |
| fprintf(stderr, "The --peer-max-bio-size option is only allowed with create-md\n"); |
| exit(10); |
| } |
| if (option_al_stripes_used && |
| command->function != &meta_create_md && |
| command->function != &meta_restore_md) { |
| fprintf(stderr, "The --al-stripe* options are only allowed with create-md and restore-md\n"); |
| exit(10); |
| } |
| |
| /* at some point I'd like to go for this: (16*1024*1024/4) */ |
| if ((uint64_t)option_al_stripes * option_al_stripe_size_4k > (buffer_size/4096)) { |
| fprintf(stderr, "invalid (too large) al-stripe* settings\n"); |
| exit(10); |
| } |
| if (option_al_stripes * option_al_stripe_size_4k < 32/4) { |
| fprintf(stderr, "invalid (too small) al-stripe* settings\n"); |
| exit(10); |
| } |
| |
| if (option_node_id != -1 && !command->node_id_required) { |
| fprintf(stderr, "The %s command does not accept the --node-id option\n", |
| command->name); |
| exit(10); |
| } |
| |
| /* Hope this is sufficcient for backward compat */ |
| if (!is_v09(cfg) && command->node_id_required) { |
| if (option_node_id == -1) |
| option_node_id = 0; |
| else if (option_node_id != 0) |
| fprintf(stderr, "Not v09, implicitly set --node-id = 0\n"); |
| } |
| |
| if (option_node_id == -1 && command->node_id_required) { |
| fprintf(stderr, "The %s command requires the --node-id option\n", |
| command->name); |
| exit(10); |
| } |
| |
| rv = command->function(cfg, argv + ai, argc - ai); |
| if (minor_attached) |
| fprintf(stderr, "# Output might be stale, since minor %d is attached\n", cfg->minor); |
| |
| return rv; |
| /* and if we want an explicit free, |
| * this would be the place for it. |
| * free(cfg->md_device_name), free(cfg) ... |
| */ |
| } |