| # |
| # This file and its contents are supplied under the terms of the |
| # Common Development and Distribution License ("CDDL"), version 1.0. |
| # You may only use this file in accordance with the terms of version |
| # 1.0 of the CDDL. |
| # |
| # A full copy of the text of the CDDL should have accompanied this |
| # source. A copy of the CDDL is also available via the Internet at |
| # http://www.illumos.org/license/CDDL. |
| # |
| |
| # |
| # Copyright (c) 2015, 2021 by Delphix. All rights reserved. |
| # Copyright (c) 2016, Intel Corporation. |
| # |
| |
| . $STF_SUITE/include/libtest.shlib |
| |
| # Defaults common to all the tests in the regression group |
| export PERF_RUNTIME=${PERF_RUNTIME:-'180'} |
| export PERF_RANDSEED=${PERF_RANDSEED:-'1234'} |
| export PERF_COMPPERCENT=${PERF_COMPPERCENT:-'66'} |
| export PERF_COMPCHUNK=${PERF_COMPCHUNK:-'4096'} |
| export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} |
| |
| # Default to JSON for fio output |
| export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'} |
| |
| # Default fs creation options |
| export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \ |
| ' -o checksum=sha256 -o redundant_metadata=most'} |
| |
| function get_sync_str |
| { |
| typeset sync=$1 |
| typeset sync_str='' |
| |
| [[ $sync -eq 0 ]] && sync_str='async' |
| [[ $sync -eq 1 ]] && sync_str='sync' |
| echo $sync_str |
| } |
| |
| function get_suffix |
| { |
| typeset threads=$1 |
| typeset sync=$2 |
| typeset iosize=$3 |
| |
| typeset sync_str=$(get_sync_str $sync) |
| typeset filesystems=$(get_nfilesystems) |
| |
| typeset suffix="$sync_str.$iosize-ios" |
| suffix="$suffix.$threads-threads.$filesystems-filesystems" |
| echo $suffix |
| } |
| |
| function do_fio_run_impl |
| { |
| typeset script=$1 |
| typeset do_recreate=$2 |
| typeset clear_cache=$3 |
| |
| typeset threads=$4 |
| typeset threads_per_fs=$5 |
| typeset sync=$6 |
| typeset iosize=$7 |
| |
| typeset sync_str=$(get_sync_str $sync) |
| log_note "Running with $threads $sync_str threads, $iosize ios" |
| |
| if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then |
| log_must test $do_recreate |
| verify_threads_per_fs $threads $threads_per_fs |
| fi |
| |
| if $do_recreate; then |
| recreate_perf_pool |
| |
| # |
| # A value of zero for "threads_per_fs" is "special", and |
| # means a single filesystem should be used, regardless |
| # of the number of threads. |
| # |
| if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then |
| populate_perf_filesystems $((threads / threads_per_fs)) |
| else |
| populate_perf_filesystems 1 |
| fi |
| fi |
| |
| if $clear_cache; then |
| # Clear the ARC |
| log_must zinject -a |
| fi |
| |
| if [[ -n $ZINJECT_DELAYS ]]; then |
| apply_zinject_delays |
| else |
| log_note "No per-device commands to execute." |
| fi |
| |
| # |
| # Allow this to be overridden by the individual test case. This |
| # can be used to run the FIO job against something other than |
| # the default filesystem (e.g. against a clone). |
| # |
| export DIRECTORY=$(get_directory) |
| log_note "DIRECTORY: " $DIRECTORY |
| |
| export RUNTIME=$PERF_RUNTIME |
| export RANDSEED=$PERF_RANDSEED |
| export COMPPERCENT=$PERF_COMPPERCENT |
| export COMPCHUNK=$PERF_COMPCHUNK |
| export FILESIZE=$((TOTAL_SIZE / threads)) |
| export NUMJOBS=$threads |
| export SYNC_TYPE=$sync |
| export BLOCKSIZE=$iosize |
| sync |
| |
| # When running locally, we want to keep the default behavior of |
| # DIRECT == 0, so only set it when we're running over NFS to |
| # disable client cache for reads. |
| if [[ $NFS -eq 1 ]]; then |
| export DIRECT=1 |
| do_setup_nfs $script |
| else |
| export DIRECT=0 |
| fi |
| |
| # This will be part of the output filename. |
| typeset suffix=$(get_suffix $threads $sync $iosize) |
| |
| # Start the data collection |
| do_collect_scripts $suffix |
| |
| # Define output file |
| typeset logbase="$(get_perf_output_dir)/$(basename \ |
| $SUDO_COMMAND)" |
| typeset outfile="$logbase.fio.$suffix" |
| |
| # Start the load |
| if [[ $NFS -eq 1 ]]; then |
| log_must ssh -t $NFS_USER@$NFS_CLIENT " |
| fio --output-format=${PERF_FIO_FORMAT} \ |
| --output /tmp/fio.out /tmp/test.fio |
| " |
| log_must scp $NFS_USER@$NFS_CLIENT:/tmp/fio.out $outfile |
| log_must ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT" |
| else |
| log_must fio --output-format=${PERF_FIO_FORMAT} \ |
| --output $outfile $FIO_SCRIPTS/$script |
| fi |
| } |
| |
| # |
| # This function will run fio in a loop, according to the .fio file passed |
| # in and a number of environment variables. The following variables can be |
| # set before launching zfstest to override the defaults. |
| # |
| # PERF_RUNTIME: The time in seconds each fio invocation should run. |
| # PERF_NTHREADS: A list of how many threads each fio invocation will use. |
| # PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO. |
| # PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO. |
| # PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag' |
| # pairs that will be added to the scripts specified in each test. |
| # |
| function do_fio_run |
| { |
| typeset script=$1 |
| typeset do_recreate=$2 |
| typeset clear_cache=$3 |
| typeset threads threads_per_fs sync iosize |
| |
| for threads in $PERF_NTHREADS; do |
| for threads_per_fs in $PERF_NTHREADS_PER_FS; do |
| for sync in $PERF_SYNC_TYPES; do |
| for iosize in $PERF_IOSIZES; do |
| do_fio_run_impl \ |
| $script \ |
| $do_recreate \ |
| $clear_cache \ |
| $threads \ |
| $threads_per_fs \ |
| $sync \ |
| $iosize |
| done |
| done |
| done |
| done |
| } |
| |
| # This function sets NFS mount on the client and make sure all correct |
| # permissions are in place |
| # |
| function do_setup_nfs |
| { |
| typeset script=$1 |
| zfs set sharenfs=on $TESTFS |
| log_must chmod -R 777 /$TESTFS |
| |
| ssh -t $NFS_USER@$NFS_CLIENT "mkdir -m 777 -p $NFS_MOUNT" |
| ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT" |
| log_must ssh -t $NFS_USER@$NFS_CLIENT " |
| sudo -S mount $NFS_OPTIONS $NFS_SERVER:/$TESTFS $NFS_MOUNT |
| " |
| # |
| # The variables in the fio script are only available in our current |
| # shell session, so we have to evaluate them here before copying |
| # the resulting script over to the target machine. |
| # |
| export jobnum='$jobnum' |
| while read line; do |
| eval echo "$line" |
| done < $FIO_SCRIPTS/$script > /tmp/test.fio |
| log_must sed -i -e "s%directory.*%directory=$NFS_MOUNT%" /tmp/test.fio |
| log_must scp /tmp/test.fio $NFS_USER@$NFS_CLIENT:/tmp |
| log_must rm /tmp/test.fio |
| } |
| |
| # |
| # This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS. |
| # The script at index N is launched in the background, with its output |
| # redirected to a logfile containing the tag specified at index N + 1. |
| # |
| function do_collect_scripts |
| { |
| typeset suffix=$1 |
| |
| [[ -n $collect_scripts ]] || log_fail "No data collection scripts." |
| [[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified." |
| |
| # Add in user supplied scripts and logfiles, if any. |
| typeset oIFS=$IFS |
| IFS=',' |
| for item in $PERF_COLLECT_SCRIPTS; do |
| collect_scripts+=($(echo $item | sed 's/^ *//g')) |
| done |
| IFS=$oIFS |
| |
| typeset idx=0 |
| while [[ $idx -lt "${#collect_scripts[@]}" ]]; do |
| typeset logbase="$(get_perf_output_dir)/$(basename \ |
| $SUDO_COMMAND)" |
| typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix" |
| |
| timeout $PERF_RUNTIME ${collect_scripts[$idx]} >$outfile 2>&1 & |
| ((idx += 2)) |
| done |
| |
| # Need to explicitly return 0 because timeout(1) will kill |
| # a child process and cause us to return non-zero. |
| return 0 |
| } |
| |
| # Find a place to deposit performance data collected while under load. |
| function get_perf_output_dir |
| { |
| typeset dir="$(pwd)/perf_data" |
| [[ -d $dir ]] || mkdir -p $dir |
| |
| echo $dir |
| } |
| |
| function apply_zinject_delays |
| { |
| typeset idx=0 |
| while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do |
| [[ -n ${ZINJECT_DELAYS[$idx]} ]] || \ |
| log_must "No zinject delay found at index: $idx" |
| |
| for disk in $DISKS; do |
| log_must zinject \ |
| -d $disk -D ${ZINJECT_DELAYS[$idx]} $PERFPOOL |
| done |
| |
| ((idx += 1)) |
| done |
| } |
| |
| function clear_zinject_delays |
| { |
| log_must zinject -c all |
| } |
| |
| # |
| # Destroy and create the pool used for performance tests. |
| # |
| function recreate_perf_pool |
| { |
| [[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set." |
| |
| # |
| # In case there's been some "leaked" zinject delays, or if the |
| # performance test injected some delays itself, we clear all |
| # delays before attempting to destroy the pool. Each delay |
| # places a hold on the pool, so the destroy will fail if there |
| # are any outstanding delays. |
| # |
| clear_zinject_delays |
| |
| # |
| # This function handles the case where the pool already exists, |
| # and will destroy the previous pool and recreate a new pool. |
| # |
| create_pool $PERFPOOL $DISKS |
| } |
| |
| function verify_threads_per_fs |
| { |
| typeset threads=$1 |
| typeset threads_per_fs=$2 |
| |
| log_must test -n $threads |
| log_must test -n $threads_per_fs |
| |
| # |
| # A value of "0" is treated as a "special value", and it is |
| # interpreted to mean all threads will run using a single |
| # filesystem. |
| # |
| [[ $threads_per_fs -eq 0 ]] && return |
| |
| # |
| # The number of threads per filesystem must be a value greater |
| # than or equal to zero; since we just verified the value isn't |
| # 0 above, then it must be greater than zero here. |
| # |
| log_must test $threads_per_fs -ge 0 |
| |
| # |
| # This restriction can be lifted later if needed, but for now, |
| # we restrict the number of threads per filesystem to a value |
| # that evenly divides the thread count. This way, the threads |
| # will be evenly distributed over all the filesystems. |
| # |
| log_must test $((threads % threads_per_fs)) -eq 0 |
| } |
| |
| function populate_perf_filesystems |
| { |
| typeset nfilesystems=${1:-1} |
| |
| export TESTFS="" |
| for i in $(seq 1 $nfilesystems); do |
| typeset dataset="$PERFPOOL/fs$i" |
| create_dataset $dataset $PERF_FS_OPTS |
| if [[ -z "$TESTFS" ]]; then |
| TESTFS="$dataset" |
| else |
| TESTFS="$TESTFS $dataset" |
| fi |
| done |
| } |
| |
| function get_nfilesystems |
| { |
| typeset filesystems=( $TESTFS ) |
| echo ${#filesystems[@]} |
| } |
| |
| function get_directory |
| { |
| typeset filesystems=( $TESTFS ) |
| typeset directory= |
| |
| typeset idx=0 |
| while [[ $idx -lt "${#filesystems[@]}" ]]; do |
| mountpoint=$(get_prop mountpoint "${filesystems[$idx]}") |
| |
| if [[ -n $directory ]]; then |
| directory=$directory:$mountpoint |
| else |
| directory=$mountpoint |
| fi |
| |
| ((idx += 1)) |
| done |
| |
| echo $directory |
| } |
| |
| function get_min_arc_size |
| { |
| typeset -l min_arc_size |
| |
| if is_freebsd; then |
| min_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_min) |
| elif is_illumos; then |
| min_arc_size=$(dtrace -qn 'BEGIN { |
| printf("%u\n", `arc_stats.arcstat_c_min.value.ui64); |
| exit(0); |
| }') |
| elif is_linux; then |
| min_arc_size=`awk '$1 == "c_min" { print $3 }' \ |
| /proc/spl/kstat/zfs/arcstats` |
| fi |
| |
| [[ $? -eq 0 ]] || log_fail "get_min_arc_size failed" |
| |
| echo $min_arc_size |
| } |
| |
| function get_max_arc_size |
| { |
| typeset -l max_arc_size |
| |
| if is_freebsd; then |
| max_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_max) |
| elif is_illumos; then |
| max_arc_size=$(dtrace -qn 'BEGIN { |
| printf("%u\n", `arc_stats.arcstat_c_max.value.ui64); |
| exit(0); |
| }') |
| elif is_linux; then |
| max_arc_size=`awk '$1 == "c_max" { print $3 }' \ |
| /proc/spl/kstat/zfs/arcstats` |
| fi |
| |
| [[ $? -eq 0 ]] || log_fail "get_max_arc_size failed" |
| |
| echo $max_arc_size |
| } |
| |
| function get_arc_target |
| { |
| typeset -l arc_c |
| |
| if is_freebsd; then |
| arc_c=$(sysctl -n kstat.zfs.misc.arcstats.c) |
| elif is_illumos; then |
| arc_c=$(dtrace -qn 'BEGIN { |
| printf("%u\n", `arc_stats.arcstat_c.value.ui64); |
| exit(0); |
| }') |
| elif is_linux; then |
| arc_c=`awk '$1 == "c" { print $3 }' \ |
| /proc/spl/kstat/zfs/arcstats` |
| fi |
| |
| [[ $? -eq 0 ]] || log_fail "get_arc_target failed" |
| |
| echo $arc_c |
| } |
| |
| function get_dbuf_cache_size |
| { |
| typeset -l dbuf_cache_size dbuf_cache_shift |
| |
| if is_illumos; then |
| dbuf_cache_size=$(dtrace -qn 'BEGIN { |
| printf("%u\n", `dbuf_cache_max_bytes); |
| exit(0); |
| }') |
| else |
| dbuf_cache_shift=$(get_tunable DBUF_CACHE_SHIFT) |
| dbuf_cache_size=$(($(get_arc_target) / 2**dbuf_cache_shift)) |
| fi |
| |
| [[ $? -eq 0 ]] || log_fail "get_dbuf_cache_size failed" |
| |
| echo $dbuf_cache_size |
| } |
| |
| # Create a file with some information about how this system is configured. |
| function get_system_config |
| { |
| typeset config=$PERF_DATA_DIR/$1 |
| |
| echo "{" >>$config |
| if is_linux; then |
| echo " \"ncpus\": \"$(nproc --all)\"," >>$config |
| echo " \"physmem\": \"$(free -b | \ |
| awk '$1 == "Mem:" { print $2 }')\"," >>$config |
| echo " \"c_max\": \"$(get_max_arc_size)\"," >>$config |
| echo " \"hostname\": \"$(uname -n)\"," >>$config |
| echo " \"kernel version\": \"$(uname -sr)\"," >>$config |
| else |
| dtrace -qn 'BEGIN{ |
| printf(" \"ncpus\": %d,\n", `ncpus); |
| printf(" \"physmem\": %u,\n", `physmem * `_pagesize); |
| printf(" \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64); |
| printf(" \"kmem_flags\": \"0x%x\",", `kmem_flags); |
| exit(0)}' >>$config |
| echo " \"hostname\": \"$(uname -n)\"," >>$config |
| echo " \"kernel version\": \"$(uname -v)\"," >>$config |
| fi |
| if is_linux; then |
| lsblk -dino NAME,SIZE | awk 'BEGIN { |
| printf(" \"disks\": {\n"); first = 1} |
| {disk = $1} {size = $2; |
| if (first != 1) {printf(",\n")} else {first = 0} |
| printf(" \"%s\": \"%s\"", disk, size)} |
| END {printf("\n },\n")}' >>$config |
| |
| zfs_tunables="/sys/module/zfs/parameters" |
| |
| printf " \"tunables\": {\n" >>$config |
| for tunable in \ |
| zfs_arc_max \ |
| zfs_arc_meta_limit \ |
| zfs_arc_sys_free \ |
| zfs_dirty_data_max \ |
| zfs_flags \ |
| zfs_prefetch_disable \ |
| zfs_txg_timeout \ |
| zfs_vdev_aggregation_limit \ |
| zfs_vdev_async_read_max_active \ |
| zfs_vdev_async_write_max_active \ |
| zfs_vdev_sync_read_max_active \ |
| zfs_vdev_sync_write_max_active \ |
| zio_slow_io_ms |
| do |
| if [ "$tunable" != "zfs_arc_max" ] |
| then |
| printf ",\n" >>$config |
| fi |
| printf " \"$tunable\": \"$(<$zfs_tunables/$tunable)\"" \ |
| >>$config |
| done |
| printf "\n }\n" >>$config |
| else |
| iostat -En | awk 'BEGIN { |
| printf(" \"disks\": {\n"); first = 1} |
| /^c/ {disk = $1} |
| /^Size: [^0]/ {size = $2; |
| if (first != 1) {printf(",\n")} else {first = 0} |
| printf(" \"%s\": \"%s\"", disk, size)} |
| END {printf("\n },\n")}' >>$config |
| |
| sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \ |
| awk -F= 'BEGIN {printf(" \"system\": {\n"); first = 1} |
| {if (first != 1) {printf(",\n")} else {first = 0}; |
| printf(" \"%s\": %s", $1, $2)} |
| END {printf("\n }\n")}' >>$config |
| fi |
| echo "}" >>$config |
| } |
| |
| function num_jobs_by_cpu |
| { |
| if is_linux; then |
| typeset ncpu=$($NPROC --all) |
| else |
| typeset ncpu=$(psrinfo | $WC -l) |
| fi |
| typeset num_jobs=$ncpu |
| |
| [[ $ncpu -gt 8 ]] && num_jobs=$(echo "$ncpu * 3 / 4" | bc) |
| |
| echo $num_jobs |
| } |
| |
| # |
| # On illumos this looks like: ":sd3:sd4:sd1:sd2:" |
| # |
| function pool_to_lun_list |
| { |
| typeset pool=$1 |
| typeset ctd ctds devname lun |
| typeset lun_list=':' |
| |
| if is_illumos; then |
| ctds=$(zpool list -v $pool | |
| awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}') |
| |
| for ctd in $ctds; do |
| # Get the device name as it appears in /etc/path_to_inst |
| devname=$(readlink -f /dev/dsk/${ctd}s0 | sed -n \ |
| 's/\/devices\([^:]*\):.*/\1/p') |
| # Add a string composed of the driver name and instance |
| # number to the list for comparison with dev_statname. |
| lun=$(sed 's/"//g' /etc/path_to_inst | grep \ |
| $devname | awk '{print $3$2}') |
| lun_list="$lun_list$lun:" |
| done |
| elif is_freebsd; then |
| lun_list+=$(zpool list -HLv $pool | \ |
| awk '/a?da[0-9]+|md[0-9]+|mfid[0-9]+|nda[0-9]+|nvd[0-9]+|vtbd[0-9]+/ |
| { printf "%s:", $1 }') |
| elif is_linux; then |
| ctds=$(zpool list -HLv $pool | \ |
| awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}') |
| |
| for ctd in $ctds; do |
| lun_list="$lun_list$ctd:" |
| done |
| fi |
| echo $lun_list |
| } |
| |
| function print_perf_settings |
| { |
| echo "PERF_NTHREADS: $PERF_NTHREADS" |
| echo "PERF_NTHREADS_PER_FS: $PERF_NTHREADS_PER_FS" |
| echo "PERF_SYNC_TYPES: $PERF_SYNC_TYPES" |
| echo "PERF_IOSIZES: $PERF_IOSIZES" |
| } |
| |
| # Create a perf_data directory to hold performance statistics and |
| # configuration information. |
| export PERF_DATA_DIR=$(get_perf_output_dir) |
| [[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json |