| # |
| # This file and its contents are supplied under the terms of the |
| # Common Development and Distribution License ("CDDL"), version 1.0. |
| # You may only use this file in accordance with the terms of version |
| # 1.0 of the CDDL. |
| # |
| # A full copy of the text of the CDDL should have accompanied this |
| # source. A copy of the CDDL is also available via the Internet at |
| # http://www.illumos.org/license/CDDL. |
| # |
| |
| # |
| # Copyright 2009 Sun Microsystems, Inc. All rights reserved. |
| # Use is subject to license terms. |
| # Copyright (c) 2012, 2019 by Delphix. All rights reserved. |
| # Copyright 2016 Nexenta Systems, Inc. |
| # Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved. |
| # Copyright (c) 2017 Lawrence Livermore National Security, LLC. |
| # Copyright (c) 2017 Datto Inc. |
| # Copyright (c) 2017 Open-E, Inc. All Rights Reserved. |
| # Copyright 2019 Richard Elling |
| # |
| |
| # |
| # Returns SCSI host number for the given disk |
| # |
| function get_scsi_host #disk |
| { |
| typeset disk=$1 |
| ls /sys/block/${disk}/device/scsi_device | cut -d : -f 1 |
| } |
| |
| # |
| # Cause a scan of all scsi host adapters by default |
| # |
| # $1 optional host number |
| # |
| function scan_scsi_hosts |
| { |
| typeset hostnum=${1} |
| |
| if is_linux; then |
| if [[ -z $hostnum ]]; then |
| for host in /sys/class/scsi_host/host*; do |
| log_must eval "echo '- - -' > $host/scan" |
| done |
| else |
| log_must eval \ |
| "echo /sys/class/scsi_host/host$hostnum/scan" \ |
| > /dev/null |
| log_must eval \ |
| "echo '- - -' > /sys/class/scsi_host/host$hostnum/scan" |
| fi |
| fi |
| } |
| |
| # |
| # Wait for newly created block devices to have their minors created. |
| # Additional arguments can be passed to udevadm trigger, with the expected |
| # arguments to typically be a block device pathname. This is useful when |
| # checking waiting on a specific device to settle rather than triggering |
| # all devices and waiting for them all to settle. |
| # |
| # The udevadm settle timeout can be 120 or 180 seconds by default for |
| # some distros. If a long delay is experienced, it could be due to some |
| # strangeness in a malfunctioning device that isn't related to the devices |
| # under test. To help debug this condition, a notice is given if settle takes |
| # too long. |
| # |
| # Note: there is no meaningful return code if udevadm fails. Consumers |
| # should not expect a return code (do not call as argument to log_must) |
| # |
| function block_device_wait |
| { |
| if is_linux; then |
| udevadm trigger $* |
| typeset local start=$SECONDS |
| udevadm settle |
| typeset local elapsed=$((SECONDS - start)) |
| [[ $elapsed > 60 ]] && \ |
| log_note udevadm settle time too long: $elapsed |
| fi |
| } |
| |
| # |
| # Check if the given device is physical device |
| # |
| function is_physical_device #device |
| { |
| typeset device=${1#$DEV_DSKDIR} |
| device=${device#$DEV_RDSKDIR} |
| |
| if is_linux; then |
| [[ -b "$DEV_DSKDIR/$device" ]] && \ |
| [[ -f /sys/module/loop/parameters/max_part ]] |
| return $? |
| else |
| echo $device | egrep "^c[0-F]+([td][0-F]+)+$" > /dev/null 2>&1 |
| return $? |
| fi |
| } |
| |
| # |
| # Check if the given device is a real device (ie SCSI device) |
| # |
| function is_real_device #disk |
| { |
| typeset disk=$1 |
| [[ -z $disk ]] && log_fail "No argument for disk given." |
| |
| if is_linux; then |
| lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \ |
| egrep disk >/dev/null |
| return $? |
| fi |
| } |
| |
| # |
| # Check if the given device is a loop device |
| # |
| function is_loop_device #disk |
| { |
| typeset disk=$1 |
| [[ -z $disk ]] && log_fail "No argument for disk given." |
| |
| if is_linux; then |
| lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \ |
| egrep loop >/dev/null |
| return $? |
| fi |
| } |
| |
| # |
| # Check if the given device is a multipath device and if there is a symbolic |
| # link to a device mapper and to a disk |
| # Currently no support for dm devices alone without multipath |
| # |
| function is_mpath_device #disk |
| { |
| typeset disk=$1 |
| [[ -z $disk ]] && log_fail "No argument for disk given." |
| |
| if is_linux; then |
| lsblk $DEV_MPATHDIR/$disk -o TYPE 2>/dev/null | \ |
| egrep mpath >/dev/null |
| if (($? == 0)); then |
| readlink $DEV_MPATHDIR/$disk > /dev/null 2>&1 |
| return $? |
| else |
| return $? |
| fi |
| fi |
| } |
| |
| # Set the slice prefix for disk partitioning depending |
| # on whether the device is a real, multipath, or loop device. |
| # Currently all disks have to be of the same type, so only |
| # checks first disk to determine slice prefix. |
| # |
| function set_slice_prefix |
| { |
| typeset disk |
| typeset -i i=0 |
| |
| if is_linux; then |
| while (( i < $DISK_ARRAY_NUM )); do |
| disk="$(echo $DISKS | nawk '{print $(i + 1)}')" |
| if ( is_mpath_device $disk ) && [[ -z $(echo $disk | awk 'substr($1,18,1)\ |
| ~ /^[[:digit:]]+$/') ]] || ( is_real_device $disk ); then |
| export SLICE_PREFIX="" |
| return 0 |
| elif ( is_mpath_device $disk || is_loop_device \ |
| $disk ); then |
| export SLICE_PREFIX="p" |
| return 0 |
| else |
| log_fail "$disk not supported for partitioning." |
| fi |
| (( i = i + 1)) |
| done |
| fi |
| } |
| |
| # |
| # Set the directory path of the listed devices in $DISK_ARRAY_NUM |
| # Currently all disks have to be of the same type, so only |
| # checks first disk to determine device directory |
| # default = /dev (linux) |
| # real disk = /dev (linux) |
| # multipath device = /dev/mapper (linux) |
| # |
| function set_device_dir |
| { |
| typeset disk |
| typeset -i i=0 |
| |
| if is_linux; then |
| while (( i < $DISK_ARRAY_NUM )); do |
| disk="$(echo $DISKS | nawk '{print $(i + 1)}')" |
| if is_mpath_device $disk; then |
| export DEV_DSKDIR=$DEV_MPATHDIR |
| return 0 |
| else |
| export DEV_DSKDIR=$DEV_RDSKDIR |
| return 0 |
| fi |
| (( i = i + 1)) |
| done |
| else |
| export DEV_DSKDIR=$DEV_RDSKDIR |
| fi |
| } |
| |
| # |
| # Get the directory path of given device |
| # |
| function get_device_dir #device |
| { |
| typeset device=$1 |
| |
| if ! $(is_physical_device $device) ; then |
| if [[ $device != "/" ]]; then |
| device=${device%/*} |
| fi |
| if [[ -b "$DEV_DSKDIR/$device" ]]; then |
| device="$DEV_DSKDIR" |
| fi |
| echo $device |
| else |
| echo "$DEV_DSKDIR" |
| fi |
| } |
| |
| # |
| # Get persistent name for given disk |
| # |
| function get_persistent_disk_name #device |
| { |
| typeset device=$1 |
| typeset dev_id |
| |
| if is_linux; then |
| if is_real_device $device; then |
| dev_id="$(udevadm info -q all -n $DEV_DSKDIR/$device \ |
| | egrep disk/by-id | nawk '{print $2; exit}' \ |
| | nawk -F / '{print $3}')" |
| echo $dev_id |
| elif is_mpath_device $device; then |
| dev_id="$(udevadm info -q all -n $DEV_DSKDIR/$device \ |
| | egrep disk/by-id/dm-uuid \ |
| | nawk '{print $2; exit}' \ |
| | nawk -F / '{print $3}')" |
| echo $dev_id |
| else |
| echo $device |
| fi |
| else |
| echo $device |
| fi |
| } |
| |
| # |
| # Online or offline a disk on the system |
| # |
| # First checks state of disk. Test will fail if disk is not properly onlined |
| # or offlined. Online is a full rescan of SCSI disks by echoing to every |
| # host entry. |
| # |
| function on_off_disk # disk state{online,offline} host |
| { |
| typeset disk=$1 |
| typeset state=$2 |
| typeset host=$3 |
| |
| [[ -z $disk ]] || [[ -z $state ]] && \ |
| log_fail "Arguments invalid or missing" |
| |
| if is_linux; then |
| if [[ $state == "offline" ]] && ( is_mpath_device $disk ); then |
| dm_name="$(readlink $DEV_DSKDIR/$disk \ |
| | nawk -F / '{print $2}')" |
| slave="$(ls /sys/block/${dm_name}/slaves \ |
| | nawk '{print $1}')" |
| while [[ -n $slave ]]; do |
| #check if disk is online |
| lsscsi | egrep $slave > /dev/null |
| if (($? == 0)); then |
| slave_dir="/sys/block/${dm_name}" |
| slave_dir+="/slaves/${slave}/device" |
| ss="${slave_dir}/state" |
| sd="${slave_dir}/delete" |
| log_must eval "echo 'offline' > ${ss}" |
| log_must eval "echo '1' > ${sd}" |
| lsscsi | egrep $slave > /dev/null |
| if (($? == 0)); then |
| log_fail "Offlining" \ |
| "$disk failed" |
| fi |
| fi |
| slave="$(ls /sys/block/$dm_name/slaves \ |
| 2>/dev/null | nawk '{print $1}')" |
| done |
| elif [[ $state == "offline" ]] && ( is_real_device $disk ); then |
| #check if disk is online |
| lsscsi | egrep $disk > /dev/null |
| if (($? == 0)); then |
| dev_state="/sys/block/$disk/device/state" |
| dev_delete="/sys/block/$disk/device/delete" |
| log_must eval "echo 'offline' > ${dev_state}" |
| log_must eval "echo '1' > ${dev_delete}" |
| lsscsi | egrep $disk > /dev/null |
| if (($? == 0)); then |
| log_fail "Offlining $disk" \ |
| "failed" |
| fi |
| else |
| log_note "$disk is already offline" |
| fi |
| elif [[ $state == "online" ]]; then |
| #force a full rescan |
| scan_scsi_hosts $host |
| block_device_wait |
| if is_mpath_device $disk; then |
| dm_name="$(readlink $DEV_DSKDIR/$disk \ |
| | nawk -F / '{print $2}')" |
| slave="$(ls /sys/block/$dm_name/slaves \ |
| | nawk '{print $1}')" |
| lsscsi | egrep $slave > /dev/null |
| if (($? != 0)); then |
| log_fail "Onlining $disk failed" |
| fi |
| elif is_real_device $disk; then |
| block_device_wait |
| typeset -i retries=0 |
| while ! lsscsi | egrep -q $disk; do |
| if (( $retries > 2 )); then |
| log_fail "Onlining $disk failed" |
| break |
| fi |
| (( ++retries )) |
| sleep 1 |
| done |
| else |
| log_fail "$disk is not a real dev" |
| fi |
| else |
| log_fail "$disk failed to $state" |
| fi |
| fi |
| } |
| |
| # |
| # Simulate disk removal |
| # |
| function remove_disk #disk |
| { |
| typeset disk=$1 |
| on_off_disk $disk "offline" |
| block_device_wait |
| } |
| |
| # |
| # Simulate disk insertion for the given SCSI host |
| # |
| function insert_disk #disk scsi_host |
| { |
| typeset disk=$1 |
| typeset scsi_host=$2 |
| on_off_disk $disk "online" $scsi_host |
| block_device_wait |
| } |
| |
| # |
| # Load scsi_debug module with specified parameters |
| # $blksz can be either one of: < 512b | 512e | 4Kn > |
| # |
| function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz |
| { |
| typeset devsize=$1 |
| typeset hosts=$2 |
| typeset tgts=$3 |
| typeset luns=$4 |
| typeset blksz=$5 |
| |
| [[ -z $devsize ]] || [[ -z $hosts ]] || [[ -z $tgts ]] || \ |
| [[ -z $luns ]] || [[ -z $blksz ]] && \ |
| log_fail "Arguments invalid or missing" |
| |
| case "$5" in |
| '512b') |
| typeset sector=512 |
| typeset blkexp=0 |
| ;; |
| '512e') |
| typeset sector=512 |
| typeset blkexp=3 |
| ;; |
| '4Kn') |
| typeset sector=4096 |
| typeset blkexp=0 |
| ;; |
| *) log_fail "Unsupported blksz value: $5" ;; |
| esac |
| |
| if is_linux; then |
| modprobe -n scsi_debug |
| if (($? != 0)); then |
| log_unsupported "Platform does not have scsi_debug" |
| "module" |
| fi |
| lsmod | egrep scsi_debug > /dev/null |
| if (($? == 0)); then |
| log_fail "scsi_debug module already installed" |
| else |
| log_must modprobe scsi_debug dev_size_mb=$devsize \ |
| add_host=$hosts num_tgts=$tgts max_luns=$luns \ |
| sector_size=$sector physblk_exp=$blkexp |
| block_device_wait |
| lsscsi | egrep scsi_debug > /dev/null |
| if (($? == 1)); then |
| log_fail "scsi_debug module install failed" |
| fi |
| fi |
| fi |
| } |
| |
| # |
| # Unload scsi_debug module, if needed. |
| # |
| function unload_scsi_debug |
| { |
| log_must_retry "in use" 5 modprobe -r scsi_debug |
| } |
| |
| # |
| # Get scsi_debug device name. |
| # Returns basename of scsi_debug device (for example "sdb"). |
| # |
| function get_debug_device |
| { |
| for i in {1..10} ; do |
| val=$(lsscsi | nawk '/scsi_debug/ {print $6; exit}' | cut -d / -f3) |
| |
| # lsscsi can take time to settle |
| if [ "$val" != "-" ] ; then |
| break |
| fi |
| sleep 1 |
| done |
| echo "$val" |
| } |
| |
| # |
| # Get actual devices used by the pool (i.e. linux sdb1 not sdb). |
| # |
| function get_pool_devices #testpool #devdir |
| { |
| typeset testpool=$1 |
| typeset devdir=$2 |
| typeset out="" |
| |
| if is_linux; then |
| out=$(zpool status -P $testpool |grep ${devdir} | awk '{print $1}') |
| out=$(echo $out | sed -e "s|${devdir}/||g" | tr '\n' ' ') |
| fi |
| echo $out |
| } |
| |
| # |
| # Write to standard out giving the level, device name, offset and length |
| # of all blocks in an input file. The offset and length are in units of |
| # 512 byte blocks. In the case of mirrored vdevs, only the first |
| # device is listed, as the levels, blocks and offsets will be the same |
| # on other devices. Note that this function only works with mirrored |
| # or non-redundant pools, not raidz. |
| # |
| # The output of this function can be used to introduce corruption at |
| # varying levels of indirection. |
| # |
| function list_file_blocks # input_file |
| { |
| typeset input_file=$1 |
| |
| [[ -f $input_file ]] || log_fail "Couldn't find $input_file" |
| |
| typeset ds="$(zfs list -H -o name $input_file)" |
| typeset pool="${ds%%/*}" |
| typeset inum="$(stat -c '%i' $input_file)" |
| |
| # |
| # Establish a mapping between vdev ids as shown in a DVA and the |
| # pathnames they correspond to in ${VDEV_MAP[]}. |
| # |
| eval $(zdb -C $pool | awk ' |
| BEGIN { |
| printf("typeset VDEV_MAP\n"); |
| looking = 0; |
| } |
| /^ children/ { |
| id = $1; |
| looking = 1; |
| } |
| /path: / && looking == 1 { |
| print id" "$2; |
| looking = 0; |
| } |
| ' | sed -n 's/^children\[\([0-9]\)\]: \(.*\)$/VDEV_MAP[\1]=\2/p') |
| |
| # |
| # The awk below parses the output of zdb, printing out the level |
| # of each block along with vdev id, offset and length. The last |
| # two are converted to decimal in the while loop. 4M is added to |
| # the offset to compensate for the first two labels and boot |
| # block. Lastly, the offset and length are printed in units of |
| # 512b blocks for ease of use with dd. |
| # |
| log_must zpool sync -f |
| typeset level path offset length |
| zdb -ddddd $ds $inum | awk -F: ' |
| BEGIN { looking = 0 } |
| /^Indirect blocks:/ { looking = 1} |
| /^\t\tsegment / { looking = 0} |
| /L[0-8]/ && looking == 1 { print $0} |
| ' | sed -n 's/^.*\(L[0-9]\) \([0-9]*\):\([0-9a-f]*\):\([0-9a-f]*\) .*$/\1 \2 \3 \4/p' | \ |
| while read level path offset length; do |
| offset=$((16#$offset)) # Conversion from hex |
| length=$((16#$length)) |
| offset="$(((offset + 4 * 1024 * 1024) / 512))" |
| length="$((length / 512))" |
| echo "$level ${VDEV_MAP[$path]} $offset $length" |
| done 2>/dev/null |
| } |
| |
| function corrupt_blocks_at_level # input_file corrupt_level |
| { |
| typeset input_file=$1 |
| typeset corrupt_level="L${2:-0}" |
| typeset level path offset length |
| |
| [[ -f $input_file ]] || log_fail "Couldn't find $input_file" |
| |
| |
| log_must list_file_blocks $input_file | \ |
| while read level path offset length; do |
| if [[ $level = $corrupt_level ]]; then |
| log_must dd if=/dev/urandom of=$path bs=512 \ |
| count=$length seek=$offset conv=notrunc |
| fi |
| done |
| |
| # This is necessary for pools made of loop devices. |
| sync |
| } |