zfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged2.ksh - backupdr - Git at Google

 #!/bin/ksh -p
 #
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
 # Common Development and Distribution License (the "License").
 # You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
 # See the License for the specific language governing permissions
 # and limitations under the License.
 #
 # When distributing Covered Code, include this CDDL HEADER in each
 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 # If applicable, add the following below this CDDL HEADER, with the
 # fields enclosed by brackets "[]" replaced with your own identifying
 # information: Portions Copyright [yyyy] [name of copyright owner]
 #
 # CDDL HEADER END
 #

 #
 # Copyright (c) 2022 by Lawrence Livermore National Security, LLC.
 #

 . $STF_SUITE/include/libtest.shlib
 . $STF_SUITE/tests/functional/redundancy/redundancy.kshlib

 #
 # DESCRIPTION:
 #	When sequentially resilvering a dRAID pool to a distributed spare
 #	silent damage to an online vdev in a replacing or spare mirror vdev
 #	is not expected to be repaired.  Not only does the rebuild have no
 #	reason to suspect the silent damage but even if it did there's no
 #	checksum available to determine the correct copy and make the repair.
 #	However, the subsequent scrub should detect and repair any damage.
 #
 # STRATEGY:
 #	1. Create block device files for the test draid pool
 #	2. For each parity value [1..3]
 #		a. Create a draid pool
 #		b. Fill it with some directories/files
 #		c. Systematically damage and replace three devices by:
 #			- Overwrite the device
 #			- Replace the damaged vdev with a distributed spare
 #			- Scrub the pool and verify repair IO is issued
 #		d. Detach the distributed spares
 #		e. Scrub the pool and verify there was nothing to repair
 #		f. Destroy the draid pool
 #

 typeset -r devs=7
 typeset -r dev_size_mb=512
 typeset -a disks

 prefetch_disable=$(get_tunable PREFETCH_DISABLE)
 rebuild_scrub_enabled=$(get_tunable REBUILD_SCRUB_ENABLED)

 function cleanup
 {
 	poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"

 	for i in {0..$devs}; do
 		rm -f "$TEST_BASE_DIR/dev-$i"
 	done

 	set_tunable32 PREFETCH_DISABLE $prefetch_disable
 	set_tunable32 REBUILD_SCRUB_ENABLED $rebuild_scrub_enabled
 }

 log_onexit cleanup

 log_must set_tunable32 PREFETCH_DISABLE 1
 log_must set_tunable32 REBUILD_SCRUB_ENABLED 0

 # Disk files which will be used by pool
 for i in {0..$(($devs - 1))}; do
 	device=$TEST_BASE_DIR/dev-$i
 	log_must truncate -s ${dev_size_mb}M $device
 	disks[${#disks[*]}+1]=$device
 done

 # Disk file which will be attached
 log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs

 dir=$TEST_BASE_DIR

 for nparity in 1 2 3; do
 	raid=draid${nparity}:3s

 	log_must zpool create -f -O compression=off -o cachefile=none \
 	    $TESTPOOL $raid ${disks[@]}
 	# log_must zfs set primarycache=metadata $TESTPOOL

 	log_must zfs create $TESTPOOL/fs
 	log_must fill_fs /$TESTPOOL/fs 1 256 10 1024 R

 	log_must zfs create -o compress=on $TESTPOOL/fs2
 	log_must fill_fs /$TESTPOOL/fs2 1 256 10 1024 R

 	log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3
 	log_must fill_fs /$TESTPOOL/fs3 1 256 10 1024 R

 	log_must zpool export $TESTPOOL
 	log_must zpool import -o cachefile=none -d $dir $TESTPOOL

 	log_must check_pool_status $TESTPOOL "errors" "No known data errors"

 	for nspare in 0 1 2; do
 		damaged=$dir/dev-${nspare}
 		spare=draid${nparity}-0-${nspare}

 		log_must zpool export $TESTPOOL
 		log_must dd conv=notrunc if=/dev/zero of=$damaged \
 		    bs=1M seek=4 count=$(($dev_size_mb-4))
 		log_must zpool import -o cachefile=none -d $dir $TESTPOOL

 		log_must zpool replace -fsw $TESTPOOL $damaged $spare

 		# Scrub the pool after the sequential resilver and verify
 		# that the silent damage was repaired by the scrub.
 		log_must zpool scrub -w $TESTPOOL
 		log_must zpool status $TESTPOOL
 		log_must check_pool_status $TESTPOOL "errors" \
 		    "No known data errors"
 		log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
 		log_mustnot check_pool_status $TESTPOOL "scan" "repaired 0B"
 	done

 	for nspare in 0 1 2; do
 		log_must check_vdev_state $TESTPOOL \
 		    spare-${nspare} "ONLINE"
 		log_must check_vdev_state $TESTPOOL \
 		    ${dir}/dev-${nspare} "ONLINE"
 		log_must check_vdev_state $TESTPOOL \
 		    draid${nparity}-0-${nspare} "ONLINE"
 	done

 	# Detach the distributed spares and scrub the pool again to
 	# verify no damage remained on the originally corrupted vdevs.
 	for nspare in 0 1 2; do
 		log_must zpool detach $TESTPOOL draid${nparity}-0-${nspare}
 	done

 	log_must zpool clear $TESTPOOL
 	log_must zpool scrub -w $TESTPOOL
 	log_must zpool status $TESTPOOL

 	log_must check_pool_status $TESTPOOL "errors" "No known data errors"
 	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
 	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"

 	log_must zpool destroy "$TESTPOOL"
 done

 log_pass "draid damaged device scrub test succeeded."
	#!/bin/ksh -p
	#
	# CDDL HEADER START
	#
	# The contents of this file are subject to the terms of the
	# Common Development and Distribution License (the "License").
	# You may not use this file except in compliance with the License.
	#
	# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
	# or http://www.opensolaris.org/os/licensing.
	# See the License for the specific language governing permissions
	# and limitations under the License.
	#
	# When distributing Covered Code, include this CDDL HEADER in each
	# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
	# If applicable, add the following below this CDDL HEADER, with the
	# fields enclosed by brackets "[]" replaced with your own identifying
	# information: Portions Copyright [yyyy] [name of copyright owner]
	#
	# CDDL HEADER END
	#

	#
	# Copyright (c) 2022 by Lawrence Livermore National Security, LLC.
	#

	. $STF_SUITE/include/libtest.shlib
	. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib

	#
	# DESCRIPTION:
	# When sequentially resilvering a dRAID pool to a distributed spare
	# silent damage to an online vdev in a replacing or spare mirror vdev
	# is not expected to be repaired. Not only does the rebuild have no
	# reason to suspect the silent damage but even if it did there's no
	# checksum available to determine the correct copy and make the repair.
	# However, the subsequent scrub should detect and repair any damage.
	#
	# STRATEGY:
	# 1. Create block device files for the test draid pool
	# 2. For each parity value [1..3]
	# a. Create a draid pool
	# b. Fill it with some directories/files
	# c. Systematically damage and replace three devices by:
	# - Overwrite the device
	# - Replace the damaged vdev with a distributed spare
	# - Scrub the pool and verify repair IO is issued
	# d. Detach the distributed spares
	# e. Scrub the pool and verify there was nothing to repair
	# f. Destroy the draid pool
	#

	typeset -r devs=7
	typeset -r dev_size_mb=512
	typeset -a disks

	prefetch_disable=$(get_tunable PREFETCH_DISABLE)
	rebuild_scrub_enabled=$(get_tunable REBUILD_SCRUB_ENABLED)

	function cleanup
	{
	poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"

	for i in {0..$devs}; do
	rm -f "$TEST_BASE_DIR/dev-$i"
	done

	set_tunable32 PREFETCH_DISABLE $prefetch_disable
	set_tunable32 REBUILD_SCRUB_ENABLED $rebuild_scrub_enabled
	}

	log_onexit cleanup

	log_must set_tunable32 PREFETCH_DISABLE 1
	log_must set_tunable32 REBUILD_SCRUB_ENABLED 0

	# Disk files which will be used by pool
	for i in {0..$(($devs - 1))}; do
	device=$TEST_BASE_DIR/dev-$i
	log_must truncate -s ${dev_size_mb}M $device
	disks[${#disks[*]}+1]=$device
	done

	# Disk file which will be attached
	log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs

	dir=$TEST_BASE_DIR

	for nparity in 1 2 3; do
	raid=draid${nparity}:3s

	log_must zpool create -f -O compression=off -o cachefile=none \
	$TESTPOOL $raid ${disks[@]}
	# log_must zfs set primarycache=metadata $TESTPOOL

	log_must zfs create $TESTPOOL/fs
	log_must fill_fs /$TESTPOOL/fs 1 256 10 1024 R

	log_must zfs create -o compress=on $TESTPOOL/fs2
	log_must fill_fs /$TESTPOOL/fs2 1 256 10 1024 R

	log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3
	log_must fill_fs /$TESTPOOL/fs3 1 256 10 1024 R

	log_must zpool export $TESTPOOL
	log_must zpool import -o cachefile=none -d $dir $TESTPOOL

	log_must check_pool_status $TESTPOOL "errors" "No known data errors"

	for nspare in 0 1 2; do
	damaged=$dir/dev-${nspare}
	spare=draid${nparity}-0-${nspare}

	log_must zpool export $TESTPOOL
	log_must dd conv=notrunc if=/dev/zero of=$damaged \
	bs=1M seek=4 count=$(($dev_size_mb-4))
	log_must zpool import -o cachefile=none -d $dir $TESTPOOL

	log_must zpool replace -fsw $TESTPOOL $damaged $spare

	# Scrub the pool after the sequential resilver and verify
	# that the silent damage was repaired by the scrub.
	log_must zpool scrub -w $TESTPOOL
	log_must zpool status $TESTPOOL
	log_must check_pool_status $TESTPOOL "errors" \
	"No known data errors"
	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
	log_mustnot check_pool_status $TESTPOOL "scan" "repaired 0B"
	done

	for nspare in 0 1 2; do
	log_must check_vdev_state $TESTPOOL \
	spare-${nspare} "ONLINE"
	log_must check_vdev_state $TESTPOOL \
	${dir}/dev-${nspare} "ONLINE"
	log_must check_vdev_state $TESTPOOL \
	draid${nparity}-0-${nspare} "ONLINE"
	done

	# Detach the distributed spares and scrub the pool again to
	# verify no damage remained on the originally corrupted vdevs.
	for nspare in 0 1 2; do
	log_must zpool detach $TESTPOOL draid${nparity}-0-${nspare}
	done

	log_must zpool clear $TESTPOOL
	log_must zpool scrub -w $TESTPOOL
	log_must zpool status $TESTPOOL

	log_must check_pool_status $TESTPOOL "errors" "No known data errors"
	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"

	log_must zpool destroy "$TESTPOOL"
	done

	log_pass "draid damaged device scrub test succeeded."