blob: 78eed0f4ce89a0ccd8bfccd64af1df68b74aeae2 [file] [log] [blame]
#!/bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#
#
# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/events/events_common.kshlib
. $STF_SUITE/tests/functional/fault/fault.cfg
#
# DESCRIPTION:
# Testing Fault Management Agent ZED Logic - Physically detached device is
# made removed and onlined when reattached
#
# STRATEGY:
# 1. Create a pool
# 2. Simulate physical removal of one device
# 3. Verify the device is removed when detached
# 4. Reattach the device
# 5. Verify the device is onlined
# 6. Repeat the same tests with a spare device:
# zed will use the spare to handle the removed data device
# 7. Repeat the same tests again with a faulted spare device:
# the removed data device should be removed
#
# NOTE: the use of 'block_device_wait' throughout the test helps avoid race
# conditions caused by mixing creation/removal events from partitioning the
# disk (zpool create) and events from physically removing it (remove_disk).
#
# NOTE: the test relies on ZED to transit state to removed on device removed
# event. The ZED does receive a removal notification but only relies on it to
# activate a hot spare. Additional work is planned to extend an existing ioctl
# interface to allow the ZED to transition the vdev in to a removed state.
#
verify_runnable "both"
if is_linux; then
# Add one 512b scsi_debug device (4Kn would generate IO errors)
# NOTE: must be larger than other "file" vdevs and minimum SPA devsize:
# add 32m of fudge
load_scsi_debug $(($MINVDEVSIZE/1024/1024+32)) 1 1 1 '512b'
else
log_unsupported "scsi debug module unsupported"
fi
function cleanup
{
destroy_pool $TESTPOOL
rm -f $filedev1
rm -f $filedev2
rm -f $filedev3
rm -f $sparedev
unload_scsi_debug
}
log_assert "ZED detects physically removed devices"
log_onexit cleanup
filedev1="$TEST_BASE_DIR/file-vdev-1"
filedev2="$TEST_BASE_DIR/file-vdev-2"
filedev3="$TEST_BASE_DIR/file-vdev-3"
sparedev="$TEST_BASE_DIR/file-vdev-spare"
removedev=$(get_debug_device)
typeset poolconfs=(
"mirror $filedev1 $removedev"
"raidz3 $filedev1 $filedev2 $filedev3 $removedev"
"mirror $filedev1 $filedev2 special mirror $filedev3 $removedev"
)
log_must truncate -s $MINVDEVSIZE $filedev1
log_must truncate -s $MINVDEVSIZE $filedev2
log_must truncate -s $MINVDEVSIZE $filedev3
log_must truncate -s $MINVDEVSIZE $sparedev
for conf in "${poolconfs[@]}"
do
# 1. Create a pool
log_must zpool create -f $TESTPOOL $conf
block_device_wait ${DEV_DSKDIR}/${removedev}
mntpnt=$(get_prop mountpoint /$TESTPOOL) ||
log_fail "get_prop mountpoint /$TESTPOOL"
# 2. Simulate physical removal of one device
remove_disk $removedev
log_must mkfile 1m $mntpnt/file
log_must zpool sync $TESTPOOL
# 3. Verify the device is removed.
log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"
# 4. Reattach the device
insert_disk $removedev
# 5. Verify the device is onlined
log_must wait_vdev_state $TESTPOOL $removedev "ONLINE"
# cleanup
destroy_pool $TESTPOOL
log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos
block_device_wait ${DEV_DSKDIR}/${removedev}
done
# 6. Repeat the same tests with a spare device: zed will use the spare to handle
# the removed data device
for conf in "${poolconfs[@]}"
do
# special vdev can not be replaced by a hot spare
if [[ $conf = *"special mirror"* ]]; then
continue
fi
# 1. Create a pool with a spare
log_must zpool create -f $TESTPOOL $conf
block_device_wait ${DEV_DSKDIR}/${removedev}
log_must zpool add $TESTPOOL spare $sparedev
mntpnt=$(get_prop mountpoint /$TESTPOOL) ||
log_fail "get_prop mountpoint /$TESTPOOL"
# 2. Simulate physical removal of one device
remove_disk $removedev
log_must mkfile 1m $mntpnt/file
log_must zpool sync $TESTPOOL
# 3. Verify the device is handled by the spare.
log_must wait_hotspare_state $TESTPOOL $sparedev "INUSE"
log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"
# 4. Reattach the device
insert_disk $removedev
# 5. Verify the device is onlined
log_must wait_vdev_state $TESTPOOL $removedev "ONLINE"
# cleanup
destroy_pool $TESTPOOL
log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos
block_device_wait ${DEV_DSKDIR}/${removedev}
done
# 7. Repeat the same tests again with a faulted spare device: zed should offline
# the removed data device if no spare is available
for conf in "${poolconfs[@]}"
do
# 1. Create a pool with a spare
log_must zpool create -f $TESTPOOL $conf
block_device_wait ${DEV_DSKDIR}/${removedev}
log_must zpool add $TESTPOOL spare $sparedev
mntpnt=$(get_prop mountpoint /$TESTPOOL) ||
log_fail "get_prop mountpoint /$TESTPOOL"
# 2. Fault the spare device making it unavailable
log_must zpool offline -f $TESTPOOL $sparedev
log_must wait_hotspare_state $TESTPOOL $sparedev "FAULTED"
# 3. Simulate physical removal of one device
remove_disk $removedev
log_must mkfile 1m $mntpnt/file
log_must zpool sync $TESTPOOL
# 4. Verify the device is removed
log_must wait_vdev_state $TESTPOOL $removedev "REMOVED"
# 5. Reattach the device
insert_disk $removedev
# 6. Verify the device is onlined
log_must wait_vdev_state $TESTPOOL $removedev "ONLINE"
# cleanup
destroy_pool $TESTPOOL
log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos
block_device_wait ${DEV_DSKDIR}/${removedev}
done
log_pass "ZED detects physically removed devices"